def test_pytorch_iterator_last_batch_pad_last_batch():
    num_gpus = 1
    batch_size = 100
    iters = 0

    pipes, data_size = create_pipeline(lambda gpu: COCOReaderPipeline(batch_size=batch_size, num_threads=4, shard_id=gpu, num_gpus=num_gpus,
                                                                      data_paths=data_sets[0], random_shuffle=True, stick_to_shard=False,
                                                                      shuffle_after_epoch=False, pad_last_batch=True), batch_size, num_gpus)

    dali_train_iter = PyTorchIterator(pipes, output_map=["data"], size=pipes[0].epoch_size("Reader"), fill_last_batch=True)

    img_ids_list, img_ids_list_set, mirrored_data, _, _ = \
        gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size)

    assert len(img_ids_list) > data_size
    assert len(img_ids_list_set) == data_size
    assert len(set(mirrored_data)) == 1

    dali_train_iter.reset()
    next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = \
        gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size)

    assert len(next_img_ids_list) > data_size
    assert len(next_img_ids_list_set) == data_size
    assert len(set(next_mirrored_data)) == 1
예제 #2
0
def test_pytorch_iterator_not_fill_last_batch_pad_last_batch():
    from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
    num_gpus = 1
    batch_size = 100

    pipes, data_size = create_pipeline(lambda gpu: COCOReaderPipeline(batch_size=batch_size, num_threads=4, shard_id=gpu, num_gpus=num_gpus,
                                                                     data_paths=data_sets[0], random_shuffle=False, stick_to_shard=False,
                                                                     shuffle_after_epoch=False, pad_last_batch=True), batch_size, num_gpus)

    dali_train_iter = PyTorchIterator(pipes, output_map=["data"], size=pipes[0].epoch_size("Reader"), fill_last_batch=False, last_batch_padded=True)

    img_ids_list, img_ids_list_set, mirrored_data, _, _ = \
        gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size)

    assert len(img_ids_list) == data_size
    assert len(img_ids_list_set) == data_size
    assert len(set(mirrored_data)) != 1

    dali_train_iter.reset()
    next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = \
        gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size)

    # there is no mirroring as data in the output is just cut off,
    # in the mirrored_data there is real data
    assert len(next_img_ids_list) == data_size
    assert len(next_img_ids_list_set) == data_size
    assert len(set(next_mirrored_data)) != 1
예제 #3
0
    def update(self, resolution_level, batch_size):
        '''


        :param resolution_level:
            resolution_level:
                2-2,3-8,4-16,5-32,6-64,7-128,8-256
        :return:
        '''
        assert resolution_level >= 2 and resolution_level <= 8, "res error"
        self.batchsize = int(self.batch_table[pow(2, resolution_level)])
        self.pipeline = TFRecordPipeline(
            batch_size=batch_size,
            size=pow(2, resolution_level),
            num_threads=4,
            device_id=self.device_idx,
            path=
            "/home/guyuchao/ssd/dataset/lsun-master/lsun_torch_tfrecord2/-r%02d.tfrecords"
            % resolution_level,
            index_path=
            "/home/guyuchao/ssd/dataset/lsun-master/lsun_torch_tfrecord2/-r%02d.idx"
            % resolution_level)
        self.pipeline.build()

        self.dali_iter = DALIGenericIterator(
            [self.pipeline], ["image/encoded"],
            self.pipeline.epoch_size("Reader"),
            auto_reset=True)
예제 #4
0
    def __init__(self,
                 dali_pipelines,
                 transcripts,
                 tokenizer,
                 batch_size,
                 shard_size,
                 pipeline_type,
                 normalize_transcripts=False):
        self.normalize_transcripts = normalize_transcripts
        self.tokenizer = tokenizer
        self.batch_size = batch_size
        from nvidia.dali.plugin.pytorch import DALIGenericIterator
        from nvidia.dali.plugin.base_iterator import LastBatchPolicy

        # in train pipeline shard_size is set to divisable by batch_size, so PARTIAL policy is safe
        if pipeline_type == 'val':
            self.dali_it = DALIGenericIterator(
                dali_pipelines, ["audio", "label", "audio_shape"],
                reader_name="Reader",
                dynamic_shape=True,
                auto_reset=True,
                last_batch_policy=LastBatchPolicy.PARTIAL)
        else:
            self.dali_it = DALIGenericIterator(
                dali_pipelines, ["audio", "label", "audio_shape"],
                size=shard_size,
                dynamic_shape=True,
                auto_reset=True,
                last_batch_padded=True,
                last_batch_policy=LastBatchPolicy.PARTIAL)

        self.tokenize(transcripts)
예제 #5
0
    def __init__(self, dali_pipelines, transcripts, symbols, batch_size,
                 reader_name, train_iterator: bool):
        self.transcripts = transcripts
        self.symbols = symbols
        self.batch_size = batch_size

        # in train pipeline shard_size is set to divisable by batch_size,
        # so PARTIAL policy is safe
        self.dali_it = DALIGenericIterator(
            dali_pipelines, ["audio", "label", "audio_shape"],
            reader_name=reader_name,
            dynamic_shape=True,
            auto_reset=True,
            last_batch_policy=LastBatchPolicy.DROP)
예제 #6
0
    def __init__(self, dali_pipelines, transcripts, symbols, batch_size,
                 reader_name, train_iterator: bool):
        self.transcripts = transcripts
        self.symbols = symbols
        self.batch_size = batch_size
        from nvidia.dali.plugin.pytorch import DALIGenericIterator
        from nvidia.dali.plugin.base_iterator import LastBatchPolicy

        self.dali_it = DALIGenericIterator(
            dali_pipelines, ["audio", "label", "audio_shape"],
            reader_name=reader_name,
            dynamic_shape=True,
            auto_reset=True,
            last_batch_policy=(LastBatchPolicy.DROP
                               if train_iterator else LastBatchPolicy.PARTIAL))
예제 #7
0
def dali_makes_miricle():
    it = SegInputIterator(4, 0, 1)
    pipe = panopticPipeline(it, 4, 1, 0)
    train_loader = DALIGenericIterator(pipe, ["images", "anns"],
                                       fill_last_batch=True)
    for nbatch, data in enumerate(train_loader):
        images = data[0]["images"]
        anns = data[0]["anns"]
        break
    print(images)
    edges = torch.zeros_like(anns)
    neighbour = torch.cat((anns[:, 1:], anns[:, -1:]), 1)
    edges += (anns != (0 or neighbour)).type(torch.uint8)
    neighbour = torch.cat((anns[:, :1], anns[:, :-1]), 1)
    edges += (anns != (0 or neighbour)).type(torch.uint8)
    neighbour = torch.cat((anns[:, :, :1], anns[:, :, :-1]), 2)
    edges += (anns != (0 or neighbour)).type(torch.uint8)
    neighbour = torch.cat((anns[:, :, 1:], anns[:, :, -1:]), 2)
    edges += (anns != (0 or neighbour)).type(torch.uint8)

    ann = anns.cpu().numpy()[3]
    edge = edges.cpu().numpy()[3]
    img = cv.cvtColor(images.cpu().numpy()[3], cv.COLOR_RGB2BGR)
    print(list(map(np.shape, [img, ann, edge])))
    edge = cv.cvtColor(edge, cv.COLOR_GRAY2BGR)
    ann = cv.cvtColor(ann, cv.COLOR_GRAY2BGR)

    edge[np.where(edge > 0)] = 255
    show = cv.hconcat([img, ann, edge])
    cv.imwrite('out.jpg', show)
예제 #8
0
 def __init__(
     self,
     video_list: Path,
     shuffle: bool,
     device_id: int,
     batch_size: int,
     num_workers: int,
     clip_length_in_frames: int,
     initial_prefetch_size: int,
     seed: int,
 ):
     self.pipe = DaliVideoPipeline(
         device_id=device_id,
         shuffle=shuffle,
         file_list=video_list,
         batch_size=batch_size,
         num_threads=num_workers,
         initial_prefetch_size=initial_prefetch_size,
         clip_length_in_frames=clip_length_in_frames,
         seed=seed,
     )
     self.pipe.build()
     self.loader = DALIGenericIterator(
         pipelines=[self.pipe],
         output_map=["data", "label", "frame_idx", "start_time"],
         size=self.pipe.epoch_size("Reader"),
     )
예제 #9
0
    def __init__(self, dali_pipelines, transcripts, symbols, batch_size,
                 shard_size, train_iterator: bool):
        self.transcripts = transcripts
        self.symbols = symbols
        self.batch_size = batch_size
        from nvidia.dali.plugin.pytorch import DALIGenericIterator
        from nvidia.dali.plugin.base_iterator import LastBatchPolicy

        # in train pipeline shard_size is set to divisable by batch_size, so PARTIAL policy is safe
        self.dali_it = DALIGenericIterator(
            dali_pipelines, ["audio", "label", "audio_shape"],
            size=shard_size,
            dynamic_shape=True,
            auto_reset=True,
            last_batch_padded=True,
            last_batch_policy=LastBatchPolicy.PARTIAL)
예제 #10
0
def file_root_dali_iterator(batch_size, file_root, num_workers, do_shuffle,
                            the_seed, iterator_size, reset, device, num_frames,
                            channels):
    pipe = VideoPipeFileRoot(batch_size=batch_size,
                             file_root=file_root,
                             shuffle=do_shuffle,
                             initial_fill=batch_size,
                             num_threads=num_workers,
                             seed=the_seed,
                             device_id=device,
                             sequence_length=num_frames,
                             channels=channels)
    pipe.build()

    if iterator_size == 'all':
        it_size = pipe.epoch_size("Reader")
    else:
        it_size = iterator_size

    dali_iter = DALIGenericIterator([pipe], ['data', 'labels'],
                                    size=it_size,
                                    auto_reset=reset,
                                    fill_last_batch=True,
                                    last_batch_padded=False)

    return dali_iter
예제 #11
0
def build_dali_pipeline(args, training=True, pipe=None):
    # pipe is prebuilt without touching the data
    train_loader = DALIGenericIterator(
        pipelines=[pipe],
        output_map=['image', 'bbox', 'label'],
        size=pipe.epoch_size()['train_reader'] // args.N_gpu,
        auto_reset=True)
    return train_loader, pipe.epoch_size()['train_reader']
예제 #12
0
 def __init__(self, params, num_workers=1, device_id=0):
     self.pipe = DaliPipeline(params,
                              num_threads=num_workers,
                              device_id=device_id)
     self.pipe.build()
     self.length = params.Nsamples
     self.iterator = DALIGenericIterator([self.pipe], ['inp', 'tar'],
                                         self.length,
                                         auto_reset=True)
예제 #13
0
def get_loader(flist, batch_size=512, device_id=0):
    pipe = ReidPipeline(flist,
                        batch_size=batch_size,
                        num_threads=8,
                        device_id=device_id)
    pipe.build()
    return DALIGenericIterator(pipe, ['images', 'labels', 'camids'],
                               size=pipe.size,
                               auto_reset=True)
예제 #14
0
파일: data.py 프로젝트: yutiansut/DALI
def get_train_dali_loader(args, default_boxes, local_seed):
    train_pipe = create_coco_pipeline(default_boxes, args, seed=local_seed)

    train_loader = DALIGenericIterator(train_pipe,
                                       ["images", "boxes", "labels"],
                                       reader_name="Reader",
                                       last_batch_policy=LastBatchPolicy.FILL)

    return train_loader
예제 #15
0
def get_train_dali_loader(args, default_boxes, local_seed):
    train_pipe = COCOPipeline(default_boxes, args, seed=local_seed)

    train_loader = DALIGenericIterator(train_pipe,
                                       ["images", "boxes", "labels"],
                                       118287 / args.N_gpu,
                                       stop_at_epoch=False)

    return train_loader
예제 #16
0
파일: data.py 프로젝트: zxs789/DALI
def get_train_dali_loader(args, default_boxes, local_seed):
    train_pipe = COCOPipeline(default_boxes, args, seed=local_seed)

    train_loader = DALIGenericIterator(train_pipe,
                                       ["images", "boxes", "labels"],
                                       reader_name="Reader",
                                       fill_last_batch=True)

    return train_loader
예제 #17
0
def load_data_dali(train_data_dir, train_batch_size):

    # put cropped train images (use utils/data.py) with size 256x256 in train_data_dir/imgfolder/
    pipe = dali.SimplePipeline(train_data_dir,
                               batch_size=train_batch_size,
                               num_threads=8,
                               device_id=0)
    pipe.build()
    train_loader = DALIGenericIterator(pipe, ['data'], size=6)
    return train_loader
예제 #18
0
def get_iter_dali(event, batch_size, num_threads, local_rank=0, cutout=0):
    pip_train = HybridTrainPipe(event,
                                batch_size=batch_size,
                                num_threads=num_threads,
                                device_id=local_rank,
                                local_rank=local_rank,
                                cutout=cutout)
    pip_train.build()
    dali_iter_train = DALIGenericIterator(pip_train, ['inputs', 'target'])
    # dali_iter_train= pip_train.run()

    return dali_iter_train
예제 #19
0
def get_train_dali_loader(args, default_boxes, local_seed):
    train_pipe = create_coco_pipeline(default_boxes,
                                      args,
                                      batch_size=args.batch_size,
                                      num_threads=args.num_workers,
                                      device_id=args.local_rank,
                                      seed=local_seed)

    train_loader = DALIGenericIterator(train_pipe,
                                       ["images", "boxes", "labels"],
                                       reader_name="Reader",
                                       last_batch_policy=LastBatchPolicy.FILL)

    return train_loader
def create_dali_iters(batch_size, file_list, num_workers):

    train_pipe = VideoPipe(batch_size=batch_size,
                           file_list=file_list,
                           # filenames=file_names,
                           shuffle=False,
                           initial_fill=2 * batch_size,
                           num_threads=num_workers)
    train_pipe.build()

    train_dali_iter = DALIGenericIterator([train_pipe], ['data', 'labels'], train_pipe.epoch_size("Reader"), auto_reset=True)

    # fill_last_batch = True, last_batch_padded = False  -> last batch = ``[7, 1]``, next iteration will return ``[2, 3]``

    return train_dali_iter
예제 #21
0
def dataCUDA(path: list(), opt):
    eii = ExternalInputIterator(batch_size=opt.batch_size,
                                root_folder=path,
                                height=opt.height)
    iterator = iter(eii)
    pipe = ExternalSourcePipeline(data_iterator=iterator,
                                  batch_size=opt.batch_size,
                                  num_threads=opt.num_workers,
                                  device_id=0)
    pipe.build()
    #     print("DALI INITIATED")
    data_iter = DALIGenericIterator([pipe], ['img', 'img_og'],
                                    dynamic_shape=True,
                                    size=len(path),
                                    auto_reset=False)
    return data_iter
예제 #22
0
class Lsun_Loader(object):
    def __init__(self, device_idx=1):
        self.batch_table = {
            4: 128,
            8: 128,
            16: 128,
            32: 64,
            64: 32,
            128: 16,
            256: 8
        }
        self.device_idx = device_idx
        #self.update(resolution_level=2)

    def update(self, resolution_level, batch_size):
        '''


        :param resolution_level:
            resolution_level:
                2-2,3-8,4-16,5-32,6-64,7-128,8-256
        :return:
        '''
        assert resolution_level >= 2 and resolution_level <= 8, "res error"
        self.batchsize = int(self.batch_table[pow(2, resolution_level)])
        self.pipeline = TFRecordPipeline(
            batch_size=batch_size,
            size=pow(2, resolution_level),
            num_threads=4,
            device_id=self.device_idx,
            path=
            "/home/guyuchao/ssd/dataset/lsun-master/lsun_torch_tfrecord2/-r%02d.tfrecords"
            % resolution_level,
            index_path=
            "/home/guyuchao/ssd/dataset/lsun-master/lsun_torch_tfrecord2/-r%02d.idx"
            % resolution_level)
        self.pipeline.build()

        self.dali_iter = DALIGenericIterator(
            [self.pipeline], ["image/encoded"],
            self.pipeline.epoch_size("Reader"),
            auto_reset=True)

    def get_batch(self):
        #self.dali_iter.reset()
        return self.dali_iter.next()[0]["image/encoded"]
def get_loader(args, phase):
    file_list = args['train_video_file_list'] if phase == 'train' else None
    num_gpus = args['num_gpus']
    batch_size_per_gpu = int(args['tem_batch_size'] / num_gpus)
    num_threads_per_gpu = max(int(args['data_workers'] / num_gpus), 2)
    pipes = [
        ActivityNetVideoPipe(args,
                             file_list,
                             batch_size=batch_size_per_gpu,
                             num_threads=num_threads_per_gpu,
                             device_id=device_id)
        for device_id in range(1)  # num_gpus)
    ]
    pipes[0].build()
    epoch_size = pipes[0].epoch_size("Reader")
    dali_iter = DALIGenericIterator(pipes, ['data', 'label'], epoch_size)
    return dali_iter, epoch_size
def get_dataloader(pipiter, pipline, output_map, cfg, is_train, device_id=0):

    if is_train:
        root_dir = cfg.train_datasets_bpath
    else:
        root_dir = cfg.test_datasets_bpath
    now_pipline = pipline(cfg,
                          root_dir,
                          cfg.batch_size,
                          cfg.worker_numbers,
                          device_id=device_id,
                          is_train=is_train)
    dataloader = DALIGenericIterator(now_pipline,
                                     output_map,
                                     now_pipline.dataset.n,
                                     auto_reset=True)
    return dataloader
예제 #25
0
    def make_loader(self):
        print(f" ==>> 使用训练集{self.args['tr_data_path']}训练 <<== ")
        train_pipe = ImagePipeline(imageset_dir=self.args['tr_data_path'],
                                   image_size=self.args["input_size"],
                                   random_shuffle=True,
                                   batch_size=self.args["batch_size"])
        train_loader = DALIGenericIterator(pipelines=train_pipe,
                                           output_map=["images", "masks"],
                                           size=train_pipe.epoch_size(),
                                           auto_reset=True,
                                           fill_last_batch=False,
                                           last_batch_padded=False)

        if self.args['val_data_path'] != None:
            print(f" ==>> 使用验证集{self.args['val_data_path']}验证 <<== ")
            val_set = ImageFolder(self.args['val_data_path'],
                                  mode="test",
                                  in_size=self.args["input_size"],
                                  prefix=self.args['prefix'])
            val_loader = DataLoaderX(val_set,
                                     batch_size=self.args["batch_size"],
                                     num_workers=self.args["num_workers"],
                                     shuffle=False,
                                     drop_last=False,
                                     pin_memory=True)
        else:
            print(" ==>> 不使用验证集验证 <<== ")
            val_loader = None

        if self.args['te_data_path'] != None:
            print(f" ==>> 使用测试集{self.args['te_data_path']}测试 <<== ")
            test_set = ImageFolder(self.args['te_data_path'],
                                   mode="test",
                                   in_size=self.args["input_size"],
                                   prefix=self.args['prefix'])
            test_loader = DataLoaderX(test_set,
                                      batch_size=self.args["batch_size"],
                                      num_workers=self.args["num_workers"],
                                      shuffle=False,
                                      drop_last=False,
                                      pin_memory=True)
        else:
            print(f" ==>> 不使用测试集测试 <<== ")
            test_loader = None
        return train_loader, test_loader, val_loader
예제 #26
0
def get_train_dali_loader(default_boxes, root, annFile, batch_size, mean, std,
                          local_rank, num_workers, ngpus, local_seed):
    train_pipe = COCOPipeline(default_boxes,
                              root,
                              annFile,
                              batch_size,
                              mean,
                              std,
                              local_rank,
                              num_workers,
                              seed=local_seed)

    train_loader = DALIGenericIterator(train_pipe,
                                       ["images", "boxes", "labels"],
                                       118287 / ngpus,
                                       auto_reset=False)

    return train_loader