def lmdb_dataflow(lmdb_path, batch_size, sample_size, is_training, test_speed=False, train_perturb_list=None, valid_perturb_list=None, so3_perturb=False, use_partial=False): df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False) size = df.size() if is_training: df = dataflow.LocallyShuffleData(df, buffer_size=2000) df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1) df = PreprocessData(df, sample_size, is_training, train_perturb_list=train_perturb_list, valid_perturb_list=valid_perturb_list, so3_perturb=so3_perturb, use_partial=use_partial) if is_training: df = dataflow.PrefetchDataZMQ(df, nr_proc=8) df = dataflow.BatchData(df, batch_size, use_list=True) df = dataflow.RepeatedData(df, -1) if test_speed: dataflow.TestDataSpeed(df, size=1000).start() df.reset_state() return df, size
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False): #df = dataflow.LMDBSerializer.load("/home/cuda/Alex/PC-NBV/data/train.lmdb", shuffle=False) df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False) #df = dataflow.LMDBSerializer.load("/home/cuda/Alex/PC-NBV/data/", shuffle=False) size = df.size() if is_training: df = dataflow.LocallyShuffleData(df, buffer_size=2000) df = dataflow.PrefetchData(df, num_prefetch=500, num_proc=1) # df = dataflow.PrefetchData(df,nr_prefetch=500, nr_proc=1) df = BatchData(df, batch_size, input_size, output_size) if is_training: df = dataflow.PrefetchDataZMQ(df, num_proc=8) #df = dataflow.PrefetchData(df,num_prefetch=500, num_proc=1) #df = dataflow.PrefetchDataZMQ(df, num_proc=1) df = dataflow.RepeatedData(df, -1) if test_speed: dataflow.TestDataSpeed(df, size=1000).start() df.reset_state() return df, size
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False): """load LMDB files, then generate batches??""" df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False) size = df.size() if is_training: df = dataflow.LocallyShuffleData(df, buffer_size=2000) # buffer_size df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1) # multiprocess the data df = BatchData(df, batch_size, input_size, output_size) if is_training: df = dataflow.PrefetchDataZMQ(df, nr_proc=8) df = dataflow.RepeatedData(df, -1) if test_speed: dataflow.TestDataSpeed(df, size=1000).start() df.reset_state() return df, size
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False): df = dataflow.LMDBData(lmdb_path, shuffle=False) size = df.size() if is_training: df = dataflow.LocallyShuffleData(df, buffer_size=2000) df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1) df = dataflow.LMDBDataPoint(df) df = PreprocessData(df, input_size, output_size) if is_training: df = dataflow.PrefetchDataZMQ(df, nr_proc=8) df = dataflow.BatchData(df, batch_size, use_list=True) df = dataflow.RepeatedData(df, -1) if test_speed: dataflow.TestDataSpeed(df, size=1000).start() df.reset_state() return df, size
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False, filter_rate=0): df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False) df = dataflow.MapData(df, lambda dp: [item for item in dp] + [random.random()]) size = df.size() print(size) if is_training: df = dataflow.LocallyShuffleData(df, buffer_size=2000) df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1) df = BatchData(df, batch_size, input_size, output_size) if is_training: df = dataflow.PrefetchDataZMQ(df, nr_proc=8) df = dataflow.RepeatedData(df, -1) if test_speed: dataflow.TestDataSpeed(df, size=1000).start() df.reset_state() return df, size
] elif train_or_valid in ['valid', 'validation']: synsets = [ line.strip() for line in open(data_path+'/imagenet_2012_validation_synset_labels.txt').readlines() ] self.datapoints = [ [base_path + 'Data/CLS-LOC/val/ILSVRC2012_val_%08d.JPEG' % (i+1), int(self.maps['synset2idx'][synset])] for i, synset in enumerate(synsets) ] else: raise ValueError('train_or_valid=%s is invalid argument must be a set train or valid' % train_or_valid) if __name__ == '__main__': import argparse import tensorpack.dataflow as df parser = argparse.ArgumentParser(description='Imagenet Dataset on Kakao Example') parser.add_argument('--service-code', type=str, required=True, help='licence key') parser.add_argument('--name', type=str, default='train', help='train or valid') args = parser.parse_args() ds = ILSVRC12(args.service_code, args.name).parallel(num_threads=32) if args.name in ['train', 'training']: ds = df.PrefetchDataZMQ(ds, nr_proc=2) df.TestDataSpeed(ds, size=5000).start()
label.append(self.df.iloc[idx]['Fibrosis']) label.append(self.df.iloc[idx]['Widening_Mediastinum']) label.append(self.df.iloc[idx]['Medical_device']) label.append(self.df.iloc[idx]['Fracture']) label.append(self.df.iloc[idx]['No_Finding']) elif self.types == 1: assert self.pathology is not None label.append(self.df.iloc[idx][self.pathology]) else: pass # Try catch exception label = np.nan_to_num(label, copy=True, nan=0) label = np.array(label, dtype=np.float32) types = label.copy() yield [image, types] elif self.is_train == 'test': yield [image] # , np.array([-1, -1, -1, -1, -1]) else: pass if __name__ == '__main__': ds = Vinmec(folder='/u01/data/Vimmec_Data_small/', train_or_valid='train', resize=256) ds.reset_state() # ds = df.MultiProcessRunnerZMQ(ds, num_proc=8) ds = df.BatchData(ds, 32) # ds = df.PrintData(ds) df.TestDataSpeed(ds).start()