예제 #1
0
def lmdb_dataflow(lmdb_path,
                  batch_size,
                  sample_size,
                  is_training,
                  test_speed=False,
                  train_perturb_list=None,
                  valid_perturb_list=None,
                  so3_perturb=False,
                  use_partial=False):
    df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
    size = df.size()
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)
    df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)
    df = PreprocessData(df,
                        sample_size,
                        is_training,
                        train_perturb_list=train_perturb_list,
                        valid_perturb_list=valid_perturb_list,
                        so3_perturb=so3_perturb,
                        use_partial=use_partial)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
    df = dataflow.BatchData(df, batch_size, use_list=True)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    df.reset_state()
    return df, size
예제 #2
0
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False):
    #df = dataflow.LMDBSerializer.load("/home/cuda/Alex/PC-NBV/data/train.lmdb", shuffle=False)

    df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
    #df = dataflow.LMDBSerializer.load("/home/cuda/Alex/PC-NBV/data/", shuffle=False)

    size = df.size()
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)
        
        df = dataflow.PrefetchData(df, num_prefetch=500, num_proc=1)
        # df = dataflow.PrefetchData(df,nr_prefetch=500, nr_proc=1)

        
    df = BatchData(df, batch_size, input_size, output_size)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, num_proc=8)
        #df = dataflow.PrefetchData(df,num_prefetch=500, num_proc=1)
        #df = dataflow.PrefetchDataZMQ(df, num_proc=1)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    
    df.reset_state()

    return df, size
예제 #3
0
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False):
    """load LMDB files, then generate batches??"""
    df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
    size = df.size()
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)  # buffer_size
        df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)  # multiprocess the data
    df = BatchData(df, batch_size, input_size, output_size)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    df.reset_state()
    return df, size
예제 #4
0
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False):
    df = dataflow.LMDBData(lmdb_path, shuffle=False)
    size = df.size()
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)
    df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)
    df = dataflow.LMDBDataPoint(df)
    df = PreprocessData(df, input_size, output_size)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
    df = dataflow.BatchData(df, batch_size, use_list=True)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    df.reset_state()
    return df, size
예제 #5
0
def lmdb_dataflow(lmdb_path,
                  batch_size,
                  input_size,
                  output_size,
                  is_training,
                  test_speed=False,
                  filter_rate=0):
    df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
    df = dataflow.MapData(df,
                          lambda dp: [item for item in dp] + [random.random()])

    size = df.size()
    print(size)
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)
        df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)
    df = BatchData(df, batch_size, input_size, output_size)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    df.reset_state()
    return df, size
예제 #6
0
            ]
        elif train_or_valid in ['valid', 'validation']:
            synsets = [
                line.strip()
                for line in open(data_path+'/imagenet_2012_validation_synset_labels.txt').readlines()
            ]
            self.datapoints = [
                [base_path + 'Data/CLS-LOC/val/ILSVRC2012_val_%08d.JPEG' % (i+1), int(self.maps['synset2idx'][synset])]
                for i, synset in enumerate(synsets)
            ]
        else:
            raise ValueError('train_or_valid=%s is invalid argument must be a set train or valid' % train_or_valid)


if __name__ == '__main__':
    import argparse
    import tensorpack.dataflow as df

    parser = argparse.ArgumentParser(description='Imagenet Dataset on Kakao Example')
    parser.add_argument('--service-code', type=str, required=True,
                        help='licence key')
    parser.add_argument('--name', type=str, default='train',
                        help='train or valid')
    args = parser.parse_args()

    ds = ILSVRC12(args.service_code, args.name).parallel(num_threads=32)
    if args.name in ['train', 'training']:
        ds = df.PrefetchDataZMQ(ds, nr_proc=2)
    
    df.TestDataSpeed(ds, size=5000).start()
예제 #7
0
                    label.append(self.df.iloc[idx]['Fibrosis'])
                    label.append(self.df.iloc[idx]['Widening_Mediastinum'])
                    label.append(self.df.iloc[idx]['Medical_device'])
                    label.append(self.df.iloc[idx]['Fracture'])
                    label.append(self.df.iloc[idx]['No_Finding'])
                elif self.types == 1:
                    assert self.pathology is not None
                    label.append(self.df.iloc[idx][self.pathology])
                else:
                    pass
                # Try catch exception
                label = np.nan_to_num(label, copy=True, nan=0)
                label = np.array(label, dtype=np.float32)
                types = label.copy()
                yield [image, types]
            elif self.is_train == 'test':
                yield [image]  # , np.array([-1, -1, -1, -1, -1])
            else:
                pass


if __name__ == '__main__':
    ds = Vinmec(folder='/u01/data/Vimmec_Data_small/',
                train_or_valid='train',
                resize=256)
    ds.reset_state()
    # ds = df.MultiProcessRunnerZMQ(ds, num_proc=8)
    ds = df.BatchData(ds, 32)
    # ds = df.PrintData(ds)
    df.TestDataSpeed(ds).start()