Exemple #1
0
    def __init__(self,
                 mode,
                 batch_size=256,
                 shuffle=False,
                 num_workers=25,
                 cache=50000,
                 collate_fn=default_collate,
                 drop_last=False,
                 cuda=False):
        # enumerate standard imagenet augmentors
        imagenet_augmentors = fbresnet_augmentor(mode == 'train')

        # load the lmdb if we can find it
        lmdb_loc = os.path.join(os.environ['IMAGENET'],
                                'ILSVRC-%s.lmdb' % mode)
        ds = td.LMDBData(lmdb_loc, shuffle=False)
        ds = td.LocallyShuffleData(ds, cache)
        ds = td.PrefetchData(ds, 5000, 1)
        ds = td.LMDBDataPoint(ds)
        ds = td.MapDataComponent(ds,
                                 lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR),
                                 0)
        ds = td.AugmentImageComponent(ds, imagenet_augmentors)
        ds = td.PrefetchDataZMQ(ds, num_workers)
        self.ds = td.BatchData(ds, batch_size)
        self.ds.reset_state()

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.cuda = cuda
    def __init__(self,
                 mode,
                 batch_size=256,
                 shuffle=False,
                 num_workers=25,
                 cache=50000,
                 collate_fn=default_collate,
                 remainder=False,
                 cuda=False,
                 transform=None):
        # enumerate standard imagenet augmentors
        #imagenet_augmentors = fbresnet_augmentor(mode == 'train')
        imagenet_augmentors = [ImgAugTVCompose(transform)]

        # load the lmdb if we can find it
        lmdb_loc = os.path.join(os.environ['IMAGENET'],
                                'ILSVRC-%s.lmdb' % mode)
        ds = td.LMDBData(lmdb_loc, shuffle=False)
        if mode == 'train':
            ds = td.LocallyShuffleData(ds, cache)
        ds = td.PrefetchData(ds, 5000, 1)
        ds = td.LMDBDataPoint(ds)
        #ds = td.MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0)
        ds = td.MapDataComponent(
            ds, lambda x: np.asarray(Image.open(io.BytesIO(x)).convert('RGB')),
            0)
        ds = td.AugmentImageComponent(ds, imagenet_augmentors)
        ds = td.PrefetchDataZMQ(ds, num_workers)
        self.ds = td.BatchData(ds, batch_size, remainder=remainder)
        self.ds.reset_state()

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.cuda = cuda
Exemple #3
0
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False):
    #df = dataflow.LMDBSerializer.load("/home/cuda/Alex/PC-NBV/data/train.lmdb", shuffle=False)

    df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
    #df = dataflow.LMDBSerializer.load("/home/cuda/Alex/PC-NBV/data/", shuffle=False)

    size = df.size()
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)
        
        df = dataflow.PrefetchData(df, num_prefetch=500, num_proc=1)
        # df = dataflow.PrefetchData(df,nr_prefetch=500, nr_proc=1)

        
    df = BatchData(df, batch_size, input_size, output_size)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, num_proc=8)
        #df = dataflow.PrefetchData(df,num_prefetch=500, num_proc=1)
        #df = dataflow.PrefetchDataZMQ(df, num_proc=1)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    
    df.reset_state()

    return df, size
Exemple #4
0
def lmdb_dataflow(lmdb_path,
                  batch_size,
                  sample_size,
                  is_training,
                  test_speed=False,
                  train_perturb_list=None,
                  valid_perturb_list=None,
                  so3_perturb=False,
                  use_partial=False):
    df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
    size = df.size()
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)
    df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)
    df = PreprocessData(df,
                        sample_size,
                        is_training,
                        train_perturb_list=train_perturb_list,
                        valid_perturb_list=valid_perturb_list,
                        so3_perturb=so3_perturb,
                        use_partial=use_partial)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
    df = dataflow.BatchData(df, batch_size, use_list=True)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    df.reset_state()
    return df, size
def data_pipe(fmri_files,
              confound_files,
              label_matrix,
              target_name=None,
              batch_size=32,
              data_type='train',
              train_percent=0.8,
              nr_thread=nr_thread,
              buffer_size=buffer_size):
    assert data_type in ['train', 'val', 'test']
    assert fmri_files is not None

    print('\n\nGenerating dataflow for %s datasets \n' % data_type)

    buffer_size = min(len(fmri_files), buffer_size)
    nr_thread = min(len(fmri_files), nr_thread)

    ds0 = gen_fmri_file(fmri_files,
                        confound_files,
                        label_matrix,
                        data_type=data_type,
                        train_percent=train_percent)
    print('dataflowSize is ' + str(ds0.size()))
    print('Loading data using %d threads with %d buffer_size ... \n' %
          (nr_thread, buffer_size))

    if target_name is None:
        target_name = np.unique(label_matrix)

    ####running the model
    start_time = time.clock()
    ds1 = dataflow.MultiThreadMapData(
        ds0,
        nr_thread=nr_thread,
        map_func=lambda dp: map_load_fmri_image(dp, target_name),
        buffer_size=buffer_size,
        strict=True)

    ds1 = dataflow.PrefetchData(ds1, buffer_size, 1)

    ds1 = split_samples(ds1)
    print('prefetch dataflowSize is ' + str(ds1.size()))

    ds1 = dataflow.LocallyShuffleData(ds1,
                                      buffer_size=ds1.size() * buffer_size)

    ds1 = dataflow.BatchData(ds1, batch_size=batch_size)
    print('Time Usage of loading data in seconds: {} \n'.format(time.clock() -
                                                                start_time))

    ds1 = dataflow.PrefetchDataZMQ(ds1, nr_proc=1)
    ds1._reset_once()
    ##ds1.reset_state()

    #return ds1.get_data()
    for df in ds1.get_data():
        ##print(np.expand_dims(df[0].astype('float32'),axis=3).shape)
        yield (np.expand_dims(df[0].astype('float32'), axis=3),
               to_categorical(df[1].astype('int32'), len(target_name)))
Exemple #6
0
    def __init__(self,
                 corpus_path,
                 tokenizer,
                 seq_len,
                 encoding="utf-8",
                 predict_feature=False,
                 batch_size=512,
                 shuffle=False,
                 num_workers=25,
                 cache=10000,
                 drop_last=False,
                 cuda=False,
                 distributed=False,
                 visualization=False,
                 span_mask=False,
                 cond_mask=False,
                 region_len=36):

        if dist.is_available() and distributed:
            rank = dist.get_rank()
            lmdb_file = os.path.join(
                corpus_path, "training_feat_part_" + str(rank) + ".lmdb")
        else:
            lmdb_file = os.path.join(corpus_path, "training_feat_all.lmdb")

        caption_path = os.path.join(corpus_path, "caption_train.json")

        print("Loading from %s" % lmdb_file)

        os.listdir(corpus_path)

        ds = td.LMDBSerializer.load(lmdb_file, shuffle=False)
        self.num_dataset = len(ds)

        self.cond_mask = cond_mask

        preprocess_function = BertPreprocessBatch(
            caption_path,
            tokenizer,
            seq_len,
            region_len,
            self.num_dataset,
            encoding="utf-8",
            predict_feature=predict_feature,
            span_mask=span_mask,
            cond_mask=cond_mask)

        # ds = td.LocallyShuffleData(ds, cache)
        ds = td.PrefetchData(ds, 5000, 1)
        ds = td.MapData(ds, preprocess_function)
        # self.ds = td.PrefetchData(ds, 1)
        ds = td.PrefetchDataZMQ(ds, num_workers)
        self.ds = td.BatchData(ds, batch_size)
        # self.ds = ds
        self.ds.reset_state()

        self.batch_size = batch_size
        self.num_workers = num_workers
Exemple #7
0
    def __init__(
        self,
        corpus_path,
        tokenizer,
        seq_len,
        encoding="utf-8",
        predict_feature=False,
        hard_negative=False,
        batch_size=512,
        shuffle=False,
        num_workers=25,
        cache=50000,
        drop_last=False,
        cuda=False,
        distributed=False,
        visualization=False,
    ):

        if dist.is_available() and distributed:
            num_replicas = dist.get_world_size()
            # assert num_replicas == 8
            rank = dist.get_rank()
            lmdb_file = "/coc/dataset/conceptual_caption/training_feat_part_" + str(rank) + ".lmdb"
            # if not os.path.exists(lmdb_file):
            # lmdb_file = "/srv/share/datasets/conceptual_caption/training_feat_part_" + str(rank) + ".lmdb"
        else:
            # lmdb_file = "/coc/dataset/conceptual_caption/training_feat_all.lmdb"
            # if not os.path.exists(lmdb_file):
            lmdb_file = "/coc/pskynet2/jlu347/multi-modal-bert/data/conceptual_caption/training_feat_all.lmdb"
            
        caption_path = "/coc/pskynet2/jlu347/multi-modal-bert/data/conceptual_caption/caption_train.json"
        print("Loading from %s" % lmdb_file)

        ds = td.LMDBSerializer.load(lmdb_file, shuffle=False)
        self.num_dataset = len(ds)

        preprocess_function = BertPreprocessBatch(
            caption_path,
            tokenizer,
            seq_len,
            36,
            self.num_dataset,
            encoding="utf-8",
            predict_feature=predict_feature,
        )

        ds = td.LocallyShuffleData(ds, cache)
        ds = td.PrefetchData(ds, 5000, 1)
        ds = td.MapData(ds, preprocess_function)
        # self.ds = td.PrefetchData(ds, 1)
        ds = td.PrefetchDataZMQ(ds, num_workers)
        self.ds = td.BatchData(ds, batch_size)
        # self.ds = ds
        self.ds.reset_state()

        self.batch_size = batch_size
        self.num_workers = num_workers
Exemple #8
0
    def __init__(self,
                 datafile,
                 batch_size,
                 num_workers=1,
                 nviews=12,
                 reset=True,
                 augment=False,
                 filter_classes=None,
                 filter_views=None,
                 polarmode='cartesian',
                 shuffle=True,
                 filter_ids=None,
                 label_to0idx=False,
                 rgb=False,
                 force_res=0,
                 autocrop=False,
                 keep_aspect_ratio=False):
        self.filter_classes = filter_classes
        self.filter_views = filter_views
        self.filter_ids = filter_ids
        self.polarmode = polarmode
        self.label_to0idx = label_to0idx
        self.rgb = rgb
        self.force_res = force_res
        self.autocrop = autocrop
        self.keep_aspect_ratio = keep_aspect_ratio

        if not isinstance(datafile, list):
            datafile = [datafile]

        ds = []
        for d in datafile:

            ds.append(df.LMDBSerializer.load(d, shuffle=shuffle))

            if shuffle:
                ds[-1] = df.LocallyShuffleData(ds[-1], 100)
            ds[-1] = df.PrefetchData(ds[-1], 20, 1)

            ds[-1] = df.MapData(ds[-1], self.load)
            if augment:
                ds[-1] = df.MapDataComponent(ds[-1], LMDBMultiView._augment, 0)

            if (not filter_classes and not filter_ids and num_workers > 1):
                # warning: skipping this is slower when filtering datasets
                #          but epoch counting will be wrong otherwise
                ds[-1] = df.PrefetchDataZMQ(ds[-1], num_workers)
            ds[-1] = df.BatchData(ds[-1], batch_size)

            if reset:
                ds[-1].reset_state()

        self.ds = ds
    def __init__(
        self,
        annotations_path,
        features_path,
        tokenizer,
        bert_model,
        seq_len,
        batch_size=512,
        num_workers=25,
        cache=10000,
        local_rank=-1,
        objective=0,
        num_locs=5,
        add_global_imgfeat=None,
    ):

        if dist.is_available() and local_rank != -1:
            rank = dist.get_rank()
            lmdb_file = os.path.join(
                features_path, "training_feat_part_" + str(rank) + ".lmdb")
        else:
            lmdb_file = os.path.join(features_path, "training_feat_all.lmdb")

            print("Loading from %s" % lmdb_file)

        ds = td.LMDBSerializer.load(lmdb_file, shuffle=False)
        self.num_dataset = len(ds)
        ds = td.LocallyShuffleData(ds, cache)
        caption_path = os.path.join(annotations_path, "caption_train.json")

        preprocess_function = BertPreprocessBatch(
            caption_path,
            tokenizer,
            bert_model,
            seq_len,
            36,
            self.num_dataset,
            objective=objective,
            num_locs=num_locs,
        )

        ds = td.PrefetchData(ds, 5000, 1)
        ds = td.MapData(ds, preprocess_function)
        ds = td.PrefetchDataZMQ(ds, num_workers)
        self.ds = td.BatchData(ds, batch_size)
        self.ds.reset_state()

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.add_global_imgfeat = add_global_imgfeat
        self.num_locs = num_locs
Exemple #10
0
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False):
    """load LMDB files, then generate batches??"""
    df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
    size = df.size()
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)  # buffer_size
        df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)  # multiprocess the data
    df = BatchData(df, batch_size, input_size, output_size)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    df.reset_state()
    return df, size
Exemple #11
0
def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False):
    df = dataflow.LMDBData(lmdb_path, shuffle=False)
    size = df.size()
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)
    df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)
    df = dataflow.LMDBDataPoint(df)
    df = PreprocessData(df, input_size, output_size)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
    df = dataflow.BatchData(df, batch_size, use_list=True)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    df.reset_state()
    return df, size
Exemple #12
0
def init_dataflow(ctfstar,batch_size):
    ''' This function creates dataflow that reads and preprocesses data in parallel '''
    augm = df.imgaug.AugmentorList([df.imgaug.MeanVarianceNormalize()])
    # create partitioned generators, one for each element in a batch
    dss0,shape = MicrosGenerator.create_partition(ctfstar,batch_size)
    # preprocess input
    dss1 = [df.MapData(ds0, lambda dp: [augm.augment(preprocess_micro(dp[0], dp[1], psize, bn)), np.array(dp[0])]) for ds0 in dss0]
    # prefetch each generator in a separate process with buffer of 4 images per process
    # dss1 = [df.PrefetchDataZMQ(ds1, nr_proc=1, hwm=2) for ds1 in dss1]
    dss1 = [df.PrefetchData(ds1, nr_prefetch=4, nr_proc=1) for ds1 in dss1]
    # join all dataflows
    ds1  = df.RandomMixData(dss1)
    # ds1  = df.JoinData(dss1)
    ds   = df.BatchData(ds1, batch_size)
    ds.reset_state()
    return ds,shape
Exemple #13
0
    def __init__(self, config, dataset_mode):
        """Set the path for Data."""
        self.data_folder = config.data_folder
        self.num_input_points = config.num_input_points
        self.num_gt_points = config.num_gt_points
        self.dataset_mode = dataset_mode

        print(self.data_folder + self.dataset_mode + '.lmdb')

        self.df = dataflow.LMDBSerializer.load(self.data_folder +
                                               self.dataset_mode + '.lmdb',
                                               shuffle=False)
        if config.mode == "train":
            self.df = dataflow.LocallyShuffleData(self.df, buffer_size=2000)
        self.df = dataflow.PrefetchData(self.df, nr_prefetch=500, nr_proc=1)
        self.df.reset_state()
Exemple #14
0
 def __init__(self, split, batch_size, set_size):
     if split == 'train':
         lmdb_path = f'{data_path}/ModelNet40_train_1024_middle.lmdb'
     else:
         lmdb_path = f'{data_path}/ModelNet40_test_1024_middle.lmdb'
     df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
     self.size = df.size()
     self.num_batches = self.size // batch_size
     if split == 'train':
         df = dataflow.LocallyShuffleData(df,
                                          buffer_size=2000)  # buffer_size
         df = dataflow.PrefetchData(df, num_prefetch=500, num_proc=1)
     df = BatchData(df, batch_size, set_size // 8, set_size - set_size // 8)
     if split == 'train':
         df = dataflow.PrefetchDataZMQ(df, num_proc=8)
     df = dataflow.RepeatedData(df, -1)
     df.reset_state()
     self.generator = df.get_data()
Exemple #15
0
def lmdb_dataflow(lmdb_path,
                  batch_size,
                  input_size,
                  output_size,
                  is_training,
                  test_speed=False,
                  filter_rate=0):
    df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
    df = dataflow.MapData(df,
                          lambda dp: [item for item in dp] + [random.random()])

    size = df.size()
    print(size)
    if is_training:
        df = dataflow.LocallyShuffleData(df, buffer_size=2000)
        df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)
    df = BatchData(df, batch_size, input_size, output_size)
    if is_training:
        df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
    df = dataflow.RepeatedData(df, -1)
    if test_speed:
        dataflow.TestDataSpeed(df, size=1000).start()
    df.reset_state()
    return df, size
Exemple #16
0
import tensorpack.dataflow as df

if __name__ == '__main__':
    ds = df.dataset.Mnist('train')
    augmentors = [
        df.imgaug.RandomApplyAug(
            df.imgaug.RandomResize((0.8, 1.2), (0.8, 1.2)), 0.3),
        df.imgaug.RandomApplyAug(df.imgaug.RotationAndCropValid(15), 0.5),
        df.imgaug.RandomApplyAug(
            df.imgaug.SaltPepperNoise(white_prob=0.01, black_prob=0.01), 0.25),
        df.imgaug.Resize((28, 28)),
        df.imgaug.CenterPaste((32, 32)),
        df.imgaug.RandomCrop((28, 28)),
        df.imgaug.MapImage(lambda x: x.reshape(28, 28, 1))
    ]
    ds = df.AugmentImageComponent(ds, augmentors)
    ds = df.BatchData(ds, batch_size=32, remainder=False)
    ds = df.PrefetchData(ds, nr_prefetch=12, nr_proc=2)
    ds = df.PrintData(ds)

    df.send_dataflow_zmq(ds, 'tcp://localhost:2222')
    def __init__(
        self,
        corpus_path,
        tokenizer,
        bert_model,
        seq_len,
        encoding="utf-8",
        visual_target=0,
        hard_negative=False,
        batch_size=512,
        shuffle=False,
        num_workers=25,
        cache=10000,
        drop_last=False,
        cuda=False,
        local_rank=-1,
        objective=0,
        visualization=False,
    ):
        TRAIN_DATASET_SIZE = 3119449

        if dist.is_available() and local_rank != -1:

            num_replicas = dist.get_world_size()
            rank = dist.get_rank()

            lmdb_file = os.path.join(
                corpus_path, "training_feat_part_" + str(rank) + ".lmdb")
        else:
            lmdb_file = os.path.join(corpus_path,
                                     "gqa_resnext152_faster_rcnn_genome.lmdb")
            # lmdb_file = os.path.join(corpus_path, "validation_feat_all.lmdb")

            print("Loading from %s" % lmdb_file)

        ds = td.LMDBSerializer.load(lmdb_file, shuffle=False)
        self.num_dataset = len(ds)
        ds = td.LocallyShuffleData(ds, cache)
        caption_path = os.path.join(corpus_path, "caption_train.json")
        # caption_path = os.path.join(corpus_path, "caption_val.json")

        preprocess_function = BertPreprocessBatch(
            caption_path,
            tokenizer,
            bert_model,
            seq_len,
            36,
            self.num_dataset,
            encoding="utf-8",
            visual_target=visual_target,
            objective=objective,
        )

        ds = td.PrefetchData(ds, 5000, 1)
        ds = td.MapData(ds, preprocess_function)
        # self.ds = td.PrefetchData(ds, 1)
        ds = td.PrefetchDataZMQ(ds, num_workers)
        self.ds = td.BatchData(ds, batch_size)
        # self.ds = ds
        self.ds.reset_state()

        self.batch_size = batch_size
        self.num_workers = num_workers
def data_pipe_3dcnn_block(fmri_files,
                          confound_files,
                          label_matrix,
                          target_name=None,
                          flag_cnn='3d',
                          block_dura=1,
                          hrf_delay=0,
                          batch_size=32,
                          data_type='train',
                          nr_thread=4,
                          buffer_size=10,
                          dataselect_percent=1.0,
                          seed=814,
                          verbose=0):
    assert data_type in ['train', 'val', 'test']
    assert flag_cnn in ['3d', '2d']
    assert fmri_files is not None
    isTrain = data_type == 'train'
    isVal = data_type == 'val'
    isTest = data_type == 'test'

    buffer_size = int(min(len(fmri_files), buffer_size))
    nr_thread = int(min(len(fmri_files), nr_thread))

    ds0 = gen_fmri_file(fmri_files,
                        confound_files,
                        label_matrix,
                        data_type=data_type,
                        seed=seed)

    if target_name is None:
        target_name = np.unique(label_matrix)
    ##Subject_Num, Trial_Num = np.array(label_matrix).shape

    ####running the model
    start_time = time.clock()
    if flag_cnn == '2d':
        ds1 = dataflow.MultiThreadMapData(
            ds0,
            nr_thread=nr_thread,
            map_func=lambda dp: map_load_fmri_image_block(
                dp, target_name, block_dura=block_dura, hrf_delay=hrf_delay),
            buffer_size=buffer_size,
            strict=True)
    elif flag_cnn == '3d':
        ds1 = dataflow.MultiThreadMapData(
            ds0,
            nr_thread=nr_thread,
            map_func=lambda dp: map_load_fmri_image_3d_block(
                dp, target_name, block_dura=block_dura, hrf_delay=hrf_delay),
            buffer_size=buffer_size,
            strict=True)

    ds1 = dataflow.PrefetchData(ds1, buffer_size, 1)  ##1

    ds1 = split_samples(ds1,
                        subject_num=len(fmri_files),
                        batch_size=batch_size,
                        dataselect_percent=dataselect_percent)
    dataflowSize = ds1.size()

    if isTrain:
        if verbose:
            print('%d #Trials/Samples per subject with %d channels in tc' %
                  (ds1.Trial_Num, ds1.Block_dura))
        Trial_Num = ds1.Trial_Num
        ds1 = dataflow.LocallyShuffleData(ds1,
                                          buffer_size=Trial_Num * buffer_size,
                                          shuffle_interval=Trial_Num *
                                          buffer_size // 2)  #//2

    ds1 = dataflow.BatchData(ds1, batch_size=batch_size)

    if verbose:
        print('\n\nGenerating dataflow for %s datasets \n' % data_type)
        print('dataflowSize is ' + str(ds0.size()))
        print('Loading data using %d threads with %d buffer_size ... \n' %
              (nr_thread, buffer_size))
        print('prefetch dataflowSize is ' + str(dataflowSize))

        print('Time Usage of loading data in seconds: {} \n'.format(
            time.clock() - start_time))

    if isTrain:
        ds1 = dataflow.PrefetchDataZMQ(ds1, nr_proc=nr_thread)  ##1
    else:
        ds1 = dataflow.PrefetchDataZMQ(ds1, nr_proc=1)  ##1
    ##ds1._reset_once()
    ds1.reset_state()

    for df in ds1.get_data():
        yield (df[0].astype('float32'),
               one_hot(df[1],
                       len(target_name) + 1).astype('uint8'))


###end of tensorpack: multithread
##############################################################
def data_pipe_3dcnn_block(fmri_files,
                          confound_files,
                          label_matrix,
                          target_name=None,
                          flag_cnn='3d',
                          block_dura=1,
                          batch_size=32,
                          data_type='train',
                          nr_thread=nr_thread,
                          buffer_size=buffer_size):
    assert data_type in ['train', 'val', 'test']
    assert flag_cnn in ['3d', '2d']
    assert fmri_files is not None
    isTrain = data_type == 'train'
    isVal = data_type == 'val'

    print('\n\nGenerating dataflow for %s datasets \n' % data_type)

    buffer_size = int(min(len(fmri_files), buffer_size))
    nr_thread = int(min(len(fmri_files), nr_thread))

    ds0 = gen_fmri_file(fmri_files,
                        confound_files,
                        label_matrix,
                        data_type=data_type)
    print('dataflowSize is ' + str(ds0.size()))
    print('Loading data using %d threads with %d buffer_size ... \n' %
          (nr_thread, buffer_size))

    if target_name is None:
        target_name = np.unique(label_matrix)
    ##Subject_Num, Trial_Num = np.array(label_matrix).shape

    ####running the model
    start_time = time.clock()
    if flag_cnn == '2d':
        ds1 = dataflow.MultiThreadMapData(
            ds0,
            nr_thread=nr_thread,
            map_func=lambda dp: map_load_fmri_image_block(
                dp, target_name, block_dura=block_dura),
            buffer_size=buffer_size,
            strict=True)
    elif flag_cnn == '3d':
        ds1 = dataflow.MultiThreadMapData(
            ds0,
            nr_thread=nr_thread,
            map_func=lambda dp: map_load_fmri_image_3d_block(
                dp, target_name, block_dura=block_dura),
            buffer_size=buffer_size,
            strict=True)

    ds1 = dataflow.PrefetchData(ds1, buffer_size, 1)

    ds1 = split_samples(ds1)
    print('prefetch dataflowSize is ' + str(ds1.size()))

    if isTrain:
        print('%d #Trials/Samples per subject with %d channels in tc' %
              (ds1.Trial_Num, ds1.Block_dura))
        Trial_Num = ds1.Trial_Num
        #ds1 = dataflow.LocallyShuffleData(ds1, buffer_size=ds1.size() * buffer_size)
        ds1 = dataflow.LocallyShuffleData(ds1,
                                          buffer_size=Trial_Num * buffer_size,
                                          shuffle_interval=Trial_Num *
                                          buffer_size)  #//2

    ds1 = dataflow.BatchData(ds1, batch_size=batch_size, remainder=True)
    print('Time Usage of loading data in seconds: {} \n'.format(time.clock() -
                                                                start_time))

    ds1 = dataflow.PrefetchDataZMQ(ds1, nr_proc=1)
    #ds1._reset_once()
    ##ds1.reset_state()
    '''
    for df in ds1.get_data():
        if flag_cnn == '2d':
            yield (df[0].astype('float32'),to_categorical(df[1].astype('int32'), len(target_name)))
        elif flag_cnn == '3d':
            yield (df[0].astype('float32'),to_categorical(df[1].astype('int32'), len(target_name)))
    '''
    return ds1