Beispiel #1
0
def test_pytorch_iterator_not_fill_last_batch_pad_last_batch():
    from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
    num_gpus = 1
    batch_size = 100

    pipes, data_size = create_pipeline(
        lambda gpu: COCOReaderPipeline(batch_size=batch_size,
                                       num_threads=4,
                                       shard_id=gpu,
                                       num_gpus=num_gpus,
                                       data_paths=data_sets[0],
                                       random_shuffle=False,
                                       stick_to_shard=False,
                                       shuffle_after_epoch=False,
                                       pad_last_batch=True), batch_size,
        num_gpus)

    dali_train_iter = PyTorchIterator(pipes,
                                      output_map=["data"],
                                      size=pipes[0].epoch_size("Reader"),
                                      fill_last_batch=False,
                                      last_batch_padded=True)

    img_ids_list, img_ids_list_set, mirrored_data, _, _ = \
        gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size)

    assert len(img_ids_list) == data_size
    assert len(img_ids_list_set) == data_size
    assert len(set(mirrored_data)) != 1

    dali_train_iter.reset()
    next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = \
        gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size)

    # there is no mirroring as data in the output is just cut off,
    # in the mirrored_data there is real data
    assert len(next_img_ids_list) == data_size
    assert len(next_img_ids_list_set) == data_size
    assert len(set(next_mirrored_data)) != 1
Beispiel #2
0
def test_pytorch_iterator_last_batch_pad_last_batch():
    num_gpus = 1
    batch_size = 100
    iters = 0

    pipes, data_size = create_pipeline(
        lambda gpu: COCOReaderPipeline(batch_size=batch_size,
                                       num_threads=4,
                                       shard_id=gpu,
                                       num_gpus=num_gpus,
                                       data_paths=data_sets[0],
                                       random_shuffle=True,
                                       stick_to_shard=False,
                                       shuffle_after_epoch=False,
                                       pad_last_batch=True), batch_size,
        num_gpus)

    dali_train_iter = PyTorchIterator(pipes,
                                      output_map=["data"],
                                      size=pipes[0].epoch_size("Reader"),
                                      fill_last_batch=True)

    img_ids_list, img_ids_list_set, mirrored_data, _, _ = \
        gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size)

    assert len(img_ids_list) > data_size
    assert len(img_ids_list_set) == data_size
    assert len(set(mirrored_data)) == 1

    dali_train_iter.reset()
    next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = \
        gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size)

    assert len(next_img_ids_list) > data_size
    assert len(next_img_ids_list_set) == data_size
    assert len(set(next_mirrored_data)) == 1
Beispiel #3
0
    eii = CXRImageIterator(batch_size,
                           0,
                           1,
                           train_ids,
                           train_labels,
                           args.image_dir,
                           image_format=args.image_format,
                           encoding=args.label_encoding)
    pipe = CXRImagePipeline(batch_size=batch_size,
                            num_threads=2,
                            device_id=0,
                            num_gpus=4,
                            external_data=eii)
    pii = PyTorchIterator(pipe,
                          size=eii.size,
                          last_batch_padded=True,
                          fill_last_batch=False)

    times = []
    for e in range(1):
        for i, data in enumerate(pii):
            if i == 100: break
            if i != 0:
                stop = time()
                times.append(stop - start)
            print("epoch: {}, iter {}, real batch size: {}, data shape: {}".
                  format(e, i, len(data[0]["data"]), data[0]['data'].shape))
            img = data[0]['data'].squeeze().cpu().numpy()
            print(np.max(img))
            print(np.min(img))
            print(img.shape)
Beispiel #4
0
def test_stop_iteration_pytorch():
    from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
    test_stop_iteration(lambda pipe, size, auto_reset : PyTorchIterator(pipe, output_map=["data"],  size=size, auto_reset=auto_reset), "PyTorchIterator")
Beispiel #5
0
def test_stop_iteration_pytorch_fail_single():
    from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
    fw_iter = lambda pipe, size, auto_reset: PyTorchIterator(
        pipe, output_map=["data"], size=size, auto_reset=auto_reset)
    check_stop_iter_fail_single(fw_iter)
Beispiel #6
0
def test_stop_iteration_pytorch():
    from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
    fw_iter = lambda pipe, size, auto_reset : PyTorchIterator(pipe, output_map=["data"],  size=size, auto_reset=auto_reset)
    iter_name = "PyTorchIterator"
    for batch_size, epochs, iter_num, auto_reset, infinite in stop_teration_case_generator():
        yield check_stop_iter, fw_iter, iter_name, batch_size, epochs, iter_num, auto_reset, infinite