Exemple #1
0
def _test_permute_dataset(batch_size, epoch_size, trailing_samples, cb,
                          py_num_workers, prefetch_queue_depth,
                          reader_queue_depth):
    num_epochs = 3
    pipe = utils.create_pipe(cb,
                             "cpu",
                             batch_size=batch_size,
                             py_num_workers=py_num_workers,
                             py_start_method="spawn",
                             parallel=True,
                             device_id=0,
                             batch=False,
                             num_threads=1,
                             cycle=None,
                             prefetch_queue_depth=prefetch_queue_depth,
                             reader_queue_depth=reader_queue_depth)
    pipe.build()
    utils.capture_processes(pipe._py_pool)
    for epoch_idx in range(num_epochs):
        epoch_data = [
            False for _ in range(epoch_size * batch_size + trailing_samples)
        ]
        for _ in range(epoch_size):
            (batch, ) = pipe.run()
            assert len(batch) == batch_size
            for sample in batch:
                epoch_data[np.array(sample)[0]] = True
        assert sum(epoch_data) == epoch_size * batch_size, \
            "Epoch number {} did not contain some samples from data set".format(epoch_idx)
        try:
            pipe.run()
        except StopIteration:
            pipe.reset()
        else:
            assert False, "expected StopIteration"
Exemple #2
0
def _test_large_sample(start_method):
    batch_size = 2

    @pipeline_def
    def create_pipeline():
        large = fn.external_source(large_sample_cb,
                                   batch=False,
                                   parallel=True,
                                   prefetch_queue_depth=1)
        # iteration over array in Python is too slow, so reduce the number of elements
        # to iterate over
        reduced = fn.reductions.sum(large, axes=(1, 2))
        return reduced

    pipe = create_pipeline(batch_size=batch_size,
                           py_num_workers=2,
                           py_start_method=start_method,
                           prefetch_queue_depth=1,
                           num_threads=2,
                           device_id=0)
    pipe.build()
    capture_processes(pipe._py_pool)
    for batch_idx in range(8):
        (out, ) = pipe.run()
        for idx_in_batch in range(batch_size):
            idx_in_epoch = batch_size * batch_idx + idx_in_batch
            expected_val = idx_in_epoch * 1024 * 1024
            a = np.array(out[idx_in_batch])
            assert a.shape == (
                512, ), "Expected shape (512,) but got {}".format(a.shape)
            for val in a.flat:
                assert val == expected_val, (
                    f"Unexpected value in batch: got {val}, expected {expected_val}, "
                    f"for batch {batch_idx}, sample {idx_in_batch}")
Exemple #3
0
def test_discard():
    bs = 5
    pipe = dali.Pipeline(batch_size=bs,
                         device_id=None,
                         num_threads=5,
                         py_num_workers=4,
                         py_start_method='spawn')
    with pipe:
        ext1 = dali.fn.external_source([[np.float32(i)
                                         for i in range(bs)]] * 3,
                                       cycle='raise')
        ext2 = dali.fn.external_source(ext_cb2, batch=False, parallel=True)
        ext3 = dali.fn.external_source(ext_cb2, batch=False, parallel=False)
        pipe.set_outputs(ext1, ext2, ext3)
    pipe.build()
    utils.capture_processes(pipe._py_pool)
    sample_in_epoch = 0
    iteration = 0
    for i in range(10):
        try:
            e1, e2, e3 = pipe.run()
            for i in range(bs):
                assert e1.at(i) == i
                assert np.array_equal(
                    e2.at(i), np.array([sample_in_epoch, i, iteration]))
                assert np.array_equal(
                    e3.at(i), np.array([sample_in_epoch, i, iteration]))
                sample_in_epoch += 1
            iteration += 1
        except StopIteration:
            sample_in_epoch = 0
            iteration = 0
            pipe.reset()
Exemple #4
0
def _test_epoch_idx(batch_size, epoch_size, cb, py_num_workers,
                    prefetch_queue_depth, reader_queue_depth, batch_mode,
                    batch_info):
    num_epochs = 3
    pipe = utils.create_pipe(cb,
                             "cpu",
                             batch_size=batch_size,
                             py_num_workers=py_num_workers,
                             py_start_method="spawn",
                             parallel=True,
                             device_id=0,
                             batch=batch_mode,
                             num_threads=1,
                             cycle=None,
                             batch_info=batch_info,
                             prefetch_queue_depth=prefetch_queue_depth,
                             reader_queue_depth=reader_queue_depth)
    pipe.build()
    utils.capture_processes(pipe._py_pool)
    for epoch_idx in range(num_epochs):
        for iteration in range(epoch_size):
            (batch, ) = pipe.run()
            assert len(batch) == batch_size
            for sample_i, sample in enumerate(batch):
                expected = np.array([
                    iteration * batch_size + sample_i, sample_i, iteration,
                    epoch_idx if not batch_mode or batch_info else 0
                ])
                np.testing.assert_array_equal(sample, expected)
        try:
            pipe.run()
        except StopIteration:
            pipe.reset()
        else:
            assert False, "expected StopIteration"
Exemple #5
0
def _test_cycle_quiet_non_resetable(iterable, reader_queue_size, batch_size,
                                    epoch_size):
    pipe = utils.create_pipe(iterable,
                             "cpu",
                             batch_size=batch_size,
                             py_num_workers=1,
                             py_start_method="spawn",
                             parallel=True,
                             device_id=None,
                             batch=True,
                             num_threads=5,
                             cycle="quiet",
                             reader_queue_depth=reader_queue_size)
    pipe.build()
    utils.capture_processes(pipe._py_pool)
    for _ in range(epoch_size):
        pipe.run()
    try:
        pipe.run()
    except StopIteration:
        pipe.reset()
        try:
            pipe.run()
        except StopIteration:
            pass
        else:
            assert False, "Expected stop iteration"
    else:
        assert False, "Expected stop iteration at the end of the epoch"
Exemple #6
0
def _test_cycle_multiple_iterators(batch_size, iters_num, py_num_workers,
                                   reader_queue_sizes, cycle_policies,
                                   epoch_sizes):
    @dali.pipeline_def(batch_size=batch_size,
                       num_threads=4,
                       device_id=None,
                       py_num_workers=py_num_workers,
                       py_start_method='spawn')
    def pipeline(sample_cb, iter_1, iter_2, parallel):
        if parallel:
            queue_size_0, queue_size_1, queue_size_2 = reader_queue_sizes
        else:
            queue_size_0, queue_size_1, queue_size_2 = None, None, None
        cycle_1, cycle_2 = cycle_policies
        sample_out = dali.fn.external_source(source=sample_cb,
                                             parallel=parallel,
                                             batch=False,
                                             prefetch_queue_depth=queue_size_0)
        iter1_out = dali.fn.external_source(source=iter_1,
                                            parallel=parallel,
                                            batch=True,
                                            prefetch_queue_depth=queue_size_1,
                                            cycle=cycle_1)
        iter2_out = dali.fn.external_source(source=iter_2,
                                            parallel=parallel,
                                            batch=True,
                                            prefetch_queue_depth=queue_size_2,
                                            cycle=cycle_2)
        return (sample_out, iter1_out, iter2_out)

    shape = (2, 3)
    sample_epoch_size, iter_1_epoch_size, iter_2_epoch_size = epoch_sizes
    sample_cb = utils.ExtCallback((4, 5), sample_epoch_size * batch_size,
                                  np.int32)
    iter_1 = Iterable(batch_size,
                      shape,
                      epoch_size=iter_1_epoch_size,
                      dtype=np.int32)
    iter_2 = Iterable(batch_size,
                      shape,
                      epoch_size=iter_2_epoch_size,
                      dtype=np.int32)
    pipe_parallel = pipeline(sample_cb, iter_1, iter_2, parallel=True)
    pipe_seq = pipeline(sample_cb, iter_1, iter_2, parallel=False)
    pipe_parallel.build()
    utils.capture_processes(pipe_parallel._py_pool)
    pipe_seq.build()
    parallel_outs = collect_iterations(pipe_parallel, iters_num)
    seq_outs = collect_iterations(pipe_seq, iters_num)
    assert len(parallel_outs) == len(seq_outs)
    for parallel_out, seq_out in zip(parallel_outs, seq_outs):
        if parallel_out == StopIteration or seq_out == StopIteration:
            assert parallel_out == seq_out
            continue
        assert len(parallel_out) == len(seq_out) == 3
        for batch_parallel, batch_seq in zip(parallel_out, seq_out):
            assert len(batch_parallel) == len(batch_seq) == batch_size
            for sample_parallel, sample_seq in zip(batch_parallel, batch_seq):
                np.testing.assert_equal(np.array(sample_parallel),
                                        np.array(sample_seq))
Exemple #7
0
def _test_cycle_quiet(cb, is_gen_fun, batch_size, epoch_size,
                      reader_queue_size):
    pipe = utils.create_pipe(cb,
                             "cpu",
                             batch_size=batch_size,
                             py_num_workers=1,
                             py_start_method="spawn",
                             parallel=True,
                             device_id=None,
                             batch=True,
                             num_threads=5,
                             cycle="quiet",
                             reader_queue_depth=reader_queue_size)
    pipe.build()
    utils.capture_processes(pipe._py_pool)
    refer_iter = cb
    for i in range(3 * epoch_size + 1):
        if i % epoch_size == 0:
            if is_gen_fun:
                refer_iter = cb()
            else:
                refer_iter = iter(cb)
        (batch, ) = pipe.run()
        expected_batch = next(refer_iter)
        assert len(batch) == len(expected_batch), \
            f"Batch length mismatch: expected {len(expected_batch)}, got {len(batch)}"
        for sample, expected_sample in zip(batch, expected_batch):
            np.testing.assert_equal(sample, expected_sample)
Exemple #8
0
def test_parallel_fork_cpu_only():
    pipeline_pairs = 4
    batch_size = 10
    iters = 40
    callback = utils.ExtCallback((4, 5), iters * batch_size, np.int32)
    parallel_pipes = [(utils.create_pipe(callback,
                                         'cpu',
                                         batch_size,
                                         py_num_workers=4,
                                         py_start_method='fork',
                                         parallel=True,
                                         device_id=None),
                       utils.create_pipe(callback,
                                         'cpu',
                                         batch_size,
                                         py_num_workers=4,
                                         py_start_method='fork',
                                         parallel=True,
                                         device_id=None))
                      for i in range(pipeline_pairs)]
    for pipe0, pipe1 in parallel_pipes:
        pipe0.build()
        pipe1.build()
        utils.capture_processes(pipe0._py_pool)
        utils.capture_processes(pipe1._py_pool)
        utils.compare_pipelines(pipe0, pipe1, batch_size, iters)
Exemple #9
0
def _test_all_kinds_parallel(sample_cb, batch_cb, iter_cb, batch_size,
                             py_num_workers, reader_queue_sizes, num_iters):
    @dali.pipeline_def(batch_size=batch_size,
                       num_threads=4,
                       device_id=None,
                       py_num_workers=py_num_workers,
                       py_start_method='spawn')
    def pipeline():
        queue_size_1, queue_size_2, queue_size_3 = reader_queue_sizes
        sample_out = dali.fn.external_source(source=sample_cb,
                                             parallel=True,
                                             batch=False,
                                             prefetch_queue_depth=queue_size_1)
        batch_out = dali.fn.external_source(source=batch_cb,
                                            parallel=True,
                                            batch=True,
                                            prefetch_queue_depth=queue_size_2,
                                            batch_info=True)
        iter_out = dali.fn.external_source(source=iter_cb,
                                           parallel=True,
                                           batch=True,
                                           prefetch_queue_depth=queue_size_3,
                                           cycle="raise")
        return (sample_out, batch_out, iter_out)

    pipe = pipeline()
    pipe.build()
    utils.capture_processes(pipe._py_pool)
    for _ in range(3):
        i = 0
        while True:
            try:
                (sample_outs, batch_outs, iter_outs) = pipe.run()
                assert len(sample_outs) == len(batch_outs), \
                    f"Batch length mismatch: sample: {len(sample_outs)}, batch: {len(batch_outs)}"
                assert len(batch_outs) == len(iter_outs), \
                    f"Batch length mismatch: batch: {len(batch_outs)}, iter: {len(iter_outs)}"
                for sample_out, batch_out, iter_out in zip(
                        sample_outs, batch_outs, iter_outs):
                    np.testing.assert_equal(np.array(sample_out),
                                            np.array(batch_out))
                    np.testing.assert_equal(np.array(batch_out),
                                            np.array(iter_out))
                i += 1
            except StopIteration:
                pipe.reset()
                assert i == num_iters, \
                    f"Number of iterations mismatch: expected {num_iters}, got {i}"
                break
Exemple #10
0
def _test_vs_non_parallel(batch_size, cb_parallel, cb_seq, batch,
                          py_num_workers):
    pipe = dali.Pipeline(batch_size=batch_size,
                         device_id=None,
                         num_threads=5,
                         py_num_workers=py_num_workers,
                         py_start_method='spawn')
    with pipe:
        ext_seq = dali.fn.external_source(cb_parallel,
                                          batch=batch,
                                          parallel=False)
        ext_par = dali.fn.external_source(cb_seq, batch=batch, parallel=True)
        pipe.set_outputs(ext_seq, ext_par)
    pipe.build()
    utils.capture_processes(pipe._py_pool)
    for i in range(10):
        seq, par = pipe.run()
        for j in range(batch_size):
            s = seq.at(j)
            p = par.at(j)
            assert np.array_equal(s, p)
Exemple #11
0
def _test_cycle_raise(cb, is_gen_fun, batch_size, epoch_size,
                      reader_queue_size):
    pipe = utils.create_pipe(cb,
                             "cpu",
                             batch_size=batch_size,
                             py_num_workers=1,
                             py_start_method="spawn",
                             parallel=True,
                             device_id=None,
                             batch=True,
                             num_threads=5,
                             cycle="raise",
                             reader_queue_depth=reader_queue_size)
    pipe.build()
    utils.capture_processes(pipe._py_pool)
    if is_gen_fun:
        refer_iter = cb()
    else:
        refer_iter = cb
    for _ in range(3):
        i = 0
        while True:
            try:
                (batch, ) = pipe.run()
                expected_batch = next(refer_iter)
                assert len(batch) == len(expected_batch), \
                    f"Batch length mismatch: expected {len(expected_batch)}, got {len(batch)}"
                for sample, expected_sample in zip(batch, expected_batch):
                    np.testing.assert_equal(sample, expected_sample)
                i += 1
            except StopIteration:
                pipe.reset()
                if is_gen_fun:
                    refer_iter = cb()
                else:
                    refer_iter = iter(cb)
                assert i == epoch_size, \
                    f"Number of iterations mismatch: expected {epoch_size}, got {i}"
                break