def test_basics_input_indexes():
    """
    Test basic cases for input indexes.
    """
    logger.info("test_basics_input_indexes")
    data = ds.NumpySlicesDataset([1, 2, 3], column_names=["col_1"])
    assert data.input_indexs == ()
    data.input_indexs = 10
    assert data.input_indexs == 10
    data = data.shuffle(2)
    assert data.input_indexs == 10
    data = data.project(["col_1"])
    assert data.input_indexs == 10

    data2 = ds.NumpySlicesDataset([1, 2, 3], column_names=["col_1"])
    assert data2.input_indexs == ()
    data2 = data2.shuffle(2)
    assert data2.input_indexs == ()
    data2 = data2.project(["col_1"])
    assert data2.input_indexs == ()
    data2.input_indexs = 20
    assert data2.input_indexs == 20

    data3 = data + data2
    assert data3.input_indexs == 10
Exemple #2
0
def test_callbacks_validations():
    logger.info("test_callbacks_validations")

    with pytest.raises(Exception) as err:
        data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False)
        data.map(operations=(lambda x: x), callbacks=0)
    assert "Argument callbacks with value 0 is not " in str(err.value)

    with pytest.raises(Exception) as err:
        my_cb1 = MyDSCallback()
        data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False)
        data.map(operations=(lambda x: x), callbacks=[my_cb1, 0])
    assert "Argument callbacks[1] with value 0 is not " in str(err.value)

    with pytest.raises(Exception) as err:

        class BadCB(DSCallback):
            pass

        my_cb = BadCB()

        data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False)
        data = data.map(operations=(lambda x: x), callbacks=my_cb)
        for _ in data:
            pass
    assert "Provided Callback class did not override any of the 6 callback methods." in str(
        err.value)
Exemple #3
0
def test_numpy_slices_invalid_column_names_type():
    logger.info("Test incorrect column_names input")
    np_data = [1, 2, 3]

    with pytest.raises(TypeError) as err:
        de.NumpySlicesDataset(np_data, column_names=[1], shuffle=False)
    assert "Argument column_names[0] with value 1 is not of type [<class 'str'>]" in str(err.value)
Exemple #4
0
def test_numpyslices_sampler_chain2():
    """
    Test NumpySlicesDataset sampler chain
    """
    logger.info("test_numpyslices_sampler_chain2")

    # Create NumpySlicesDataset with sampler chain
    # Use 2 statements to add child sampler
    np_data = [1, 2, 3, 4]
    sampler = ds.SequentialSampler(start_index=1, num_samples=1)
    child_sampler = ds.SequentialSampler(start_index=1, num_samples=2)
    sampler.add_child(child_sampler)
    data1 = ds.NumpySlicesDataset(np_data, sampler=sampler)

    # Verify dataset size
    data1_size = data1.get_dataset_size()
    logger.info("dataset size is: {}".format(data1_size))
    assert data1_size == 1

    # Verify number of rows
    assert sum([1 for _ in data1]) == 1

    # Verify dataset contents
    res = []
    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
        logger.info("item: {}".format(item))
        res.append(item)
    logger.info("dataset: {}".format(res))
Exemple #5
0
def test_callbacks_sink_simulation():
    logger.info("test_callback_sink_simulation")

    events = []
    epochs = 2
    my_cb = MyWaitedCallback(events, 1)
    data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False)
    data = data.map(operations=(lambda x: x), callbacks=my_cb)
    data = data.to_device()
    data.send(num_epochs=epochs)
    for e in range(epochs):
        for s in range(4):
            time.sleep(0.5)
            events.append(f"ms_step_end_{e + 1}_{e * 4 + s + 1}")
            my_cb.step_end(run_context=0)
        events.append(f"ms_epoch_end_{e + 1}_{(e + 1) * 4}")
        my_cb.epoch_end(run_context=0)
    expected_synced_events = [
        'ms_step_end_1_1', 'ds_step_begin_1_2', 'ms_step_end_1_2',
        'ds_step_begin_1_3', 'ms_step_end_1_3', 'ds_step_begin_1_4',
        'ms_step_end_1_4', 'ms_epoch_end_1_4', 'ds_epoch_begin_2_4',
        'ds_step_begin_2_5', 'ms_step_end_2_5', 'ds_step_begin_2_6',
        'ms_step_end_2_6', 'ds_step_begin_2_7', 'ms_step_end_2_7',
        'ds_step_begin_2_8', 'ms_step_end_2_8', 'ms_epoch_end_2_8'
    ]

    assert events == expected_synced_events
Exemple #6
0
def test_callbacks_non_sink_mismatch_size():
    logger.info("test_callbacks_non_sink_mismatch_size")
    default_timeout = ds.config.get_callback_timeout()
    ds.config.set_callback_timeout(1)

    events = []
    my_cb1 = MyWaitedCallback(events, 2)
    my_cb2 = MyMSCallback(events)
    arr = [1, 2, 3, 4]
    data = ds.NumpySlicesDataset((arr, arr),
                                 column_names=["c1", "c2"],
                                 shuffle=False)
    data = data.map(operations=(lambda x: x), callbacks=my_cb1)
    data = data.batch(3)
    net = Net()
    model = Model(net)
    with pytest.raises(Exception) as err:
        model.train(2,
                    data,
                    dataset_sink_mode=False,
                    callbacks=[my_cb2, my_cb1])
    assert "RuntimeError: ds_step_begin timed out after 1 second(s)" in str(
        err.value)

    ds.config.set_callback_timeout(default_timeout)
Exemple #7
0
def test_numpy_slices_invalid_empty_column_names():
    logger.info("Test incorrect column_names input")
    np_data = [1, 2, 3]

    with pytest.raises(ValueError) as err:
        de.NumpySlicesDataset(np_data, column_names=[], shuffle=False)
    assert "column_names should not be empty" in str(err.value)
Exemple #8
0
def test_numpyslices_sampler_chain_batch():
    """
    Test NumpySlicesDataset sampler chaining, with batch
    """
    logger.info("test_numpyslices_sampler_chain_batch")

    # Create NumpySlicesDataset with sampler chain
    np_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    sampler = ds.SequentialSampler(start_index=1, num_samples=3)
    sampler = sampler.add_child(
        ds.SequentialSampler(start_index=1, num_samples=2))
    data1 = ds.NumpySlicesDataset(np_data, sampler=sampler)
    data1 = data1.batch(batch_size=3, drop_remainder=False)

    # Verify dataset size
    data1_size = data1.get_dataset_size()
    logger.info("dataset size is: {}".format(data1_size))
    assert data1_size == 4

    # Verify number of rows
    assert sum([1 for _ in data1]) == 4

    # Verify dataset contents
    res = []
    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
        logger.info("item: {}".format(item))
        res.append(item)
    logger.info("dataset: {}".format(res))
Exemple #9
0
def test_callbacks_non_sink_batch_size2():
    logger.info("test_callbacks_non_sink_batch_size2")

    events = []
    my_cb1 = MyWaitedCallback(events, 2)
    my_cb2 = MyMSCallback(events)
    arr = [1, 2, 3, 4]
    data = ds.NumpySlicesDataset((arr, arr),
                                 column_names=["c1", "c2"],
                                 shuffle=False)
    data = data.map(operations=(lambda x: x), callbacks=my_cb1)
    data = data.batch(2)
    net = Net()
    model = Model(net)

    model.train(2, data, dataset_sink_mode=False, callbacks=[my_cb2, my_cb1])

    expected_synced_events = [
        'ms_step_end_1_1', 'ds_step_begin_1_3', 'ms_step_end_1_2',
        'ms_epoch_end_1_2', 'ds_epoch_begin_2_4', 'ds_step_begin_2_5',
        'ms_step_end_2_3', 'ds_step_begin_2_7', 'ms_step_end_2_4',
        'ms_epoch_end_2_4'
    ]

    assert events == expected_synced_events
Exemple #10
0
def compare(in1, in2, length, out1, out2):
    data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]})
    data = data.map(input_columns=["s1", "s2"],
                    operations=text.TruncateSequencePair(length))
    for d in data.create_dict_iterator():
        np.testing.assert_array_equal(out1, d["s1"])
        np.testing.assert_array_equal(out2, d["s2"])
Exemple #11
0
def test_numpy_slices_distributed_zero_shard():
    logger.info("Test Slicing a 1D list.")

    np_data = [1, 2, 3]
    with pytest.raises(ValueError) as err:
        de.NumpySlicesDataset(np_data, num_shards=0, shard_id=0, shuffle=False)
    assert "Input num_shards is not within the required interval of [1, 2147483647]." in str(err.value)
Exemple #12
0
def test_clear_callback():
    logger.info("test_clear_callback")

    # this test case will test that callback is removed for get_dataset_size and output_shape/type
    class FlagCallback(DSCallback):
        def __init__(self):
            super().__init__(step_size=1)
            self.flag = False
            self.row_cnt = 0

        def ds_begin(self, ds_run_context):
            # if callback isn't removed in getter pass, this function will be called
            self.flag = True

        def ds_step_begin(self, ds_run_context):
            self.row_cnt += 1

    data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False)
    cb = FlagCallback()
    # make sure variables are properly initialized before testing
    assert not cb.flag and cb.row_cnt == 0
    data = data.map(operations=(lambda x: x), callbacks=cb)
    assert data.get_dataset_size() == 4
    assert data.output_shapes() == [[]]
    # make sure callback is never called by checking flag and row_cnt
    assert not cb.flag and cb.row_cnt == 0
    for _ in data.create_dict_iterator(num_epochs=1):
        pass
    # this ensure that callback is indeed called
    assert cb.flag and cb.row_cnt == 4
Exemple #13
0
def test_compose_with_custom_function():
    """
    Test Python Compose with custom function
    """
    def custom_function(x):
        return (x, x * x)

    # First dataset
    op_list = [
        lambda x: x * 3,
        custom_function,
        # convert two column output to one
        lambda *images: np.stack(images)
    ]

    data = ds.NumpySlicesDataset([[1, 2]],
                                 column_names=["col0"],
                                 shuffle=False)
    data = data.map(input_columns=["col0"], operations=op_list)
    #

    res = []
    for i in data.create_dict_iterator(output_numpy=True):
        res.append(i["col0"].tolist())
    assert res == [[[3, 6], [9, 36]]]
Exemple #14
0
def test_callbacks_one_cb():
    logger.info("test_callbacks_one_cb")

    data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False)
    events1 = []
    events2 = []
    events3 = []
    my_begin = Begin(events=events1, cb_id=1)
    my_epoch_begin = EpochBegin(events=events2, cb_id=2)
    my_epoch_end = EpochEnd(events=events3, cb_id=3)
    my_step_begin = StepBegin(events=events3, cb_id=3)
    my_step_end = StepEnd(events=events2, cb_id=2)

    data = data.map(operations=(lambda x: x), callbacks=my_begin)
    data = data.map(operations=(lambda x: x), callbacks=[my_epoch_begin, my_step_end])
    data = data.map(operations=(lambda x: x), callbacks=[my_epoch_end, my_step_begin])

    itr = data.create_tuple_iterator(num_epochs=2)
    for _ in range(2):
        for _ in itr:
            pass
    expected_events1 = [('begin_0_0_0', [1])]
    expected_events2 = [('epoch_begin_1_0_0', [2]), ('step_end_1_1_1', [2]), ('step_end_1_2_2', [2]),
                        ('step_end_1_3_3', [2]), ('step_end_1_4_4', [2]), ('epoch_begin_2_0_4', [2]),
                        ('step_end_2_1_5', [2]), ('step_end_2_2_6', [2]), ('step_end_2_3_7', [2]),
                        ('step_end_2_4_8', [2])]
    expected_events3 = [('step_begin_1_1_1', [3]), ('step_begin_1_2_2', [3]), ('step_begin_1_3_3', [3]),
                        ('step_begin_1_4_4', [3]), ('epoch_end_1_4_4', [3]), ('step_begin_2_1_5', [3]),
                        ('step_begin_2_2_6', [3]), ('step_begin_2_3_7', [3]), ('step_begin_2_4_8', [3]),
                        ('epoch_end_2_4_8', [3])]
    assert events1 == expected_events1
    assert events2 == expected_events2
    assert events3 == expected_events3
Exemple #15
0
def test_numpy_slices_invalid_empty_data_column():
    logger.info("Test incorrect column_names input")
    np_data = []

    with pytest.raises(ValueError) as err:
        de.NumpySlicesDataset(np_data, shuffle=False)
    assert "Argument data cannot be empty" in str(err.value)
Exemple #16
0
def build_test_case_2maps(epochs, steps):
    events = []
    my_cb1 = MyDSCallback(events=events, cb_id=0)
    my_cb2 = MyDSCallback(events=events, cb_id=1)

    arr = list(range(1, steps + 1))
    data = ds.NumpySlicesDataset(arr, shuffle=False)

    data = data.map(operations=(lambda x: x), callbacks=my_cb1)
    data = data.map(operations=(lambda x: x), callbacks=my_cb2)

    itr = data.create_tuple_iterator(num_epochs=epochs)
    for _ in range(epochs):
        for _ in itr:
            pass

    expected_events = generate_expected(epochs, steps, map_num=2)

    assert expected_events[1:] == events[1:]

    for event in events:
        assert len(event) == 2
        event, cb_ids = event
        if event != "begin_0_0_0":
            assert cb_ids[0] == 0
            assert cb_ids[1] == 1
def pad_compare(array, pad_shape, pad_value, res):
    data = ds.NumpySlicesDataset([array])
    if pad_value is not None:
        data = data.map(operations=ops.PadEnd(pad_shape, pad_value))
    else:
        data = data.map(operations=ops.PadEnd(pad_shape))
    for d in data.create_tuple_iterator(output_numpy=True):
        np.testing.assert_array_equal(res, d[0])
Exemple #18
0
def test_numpy_slices_sequential_sampler():
    logger.info("Test numpy_slices_dataset with SequentialSampler and repeat.")

    np_data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
    ds = de.NumpySlicesDataset(np_data, sampler=de.SequentialSampler()).repeat(2)

    for i, data in enumerate(ds):
        assert np.equal(data[0].asnumpy(), np_data[i % 8]).all()
Exemple #19
0
 def pipeline():
     sampler = ds.SubsetSampler(indices, num_samples)
     data = ds.NumpySlicesDataset(list(range(0, 10)), sampler=sampler)
     data2 = ds.NumpySlicesDataset(list(range(0, 10)),
                                   sampler=indices,
                                   num_samples=num_samples)
     dataset_size = data.get_dataset_size()
     dataset_size2 = data.get_dataset_size()
     res1 = [
         d[0] for d in data.create_tuple_iterator(num_epochs=1,
                                                  output_numpy=True)
     ], dataset_size
     res2 = [
         d[0] for d in data2.create_tuple_iterator(num_epochs=1,
                                                   output_numpy=True)
     ], dataset_size2
     return res1, res2
Exemple #20
0
def test_numpy_slices_list_1():
    logger.info("Test Slicing a 1D list.")

    np_data = [1, 2, 3]
    ds = de.NumpySlicesDataset(np_data, shuffle=False)

    for i, data in enumerate(ds):
        assert data[0].asnumpy() == np_data[i]
Exemple #21
0
def compare(array):
    data = ds.NumpySlicesDataset([array], column_names="x")
    array = np.array(array)
    data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"],
                    column_order=["x", "y"])
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        np.testing.assert_array_equal(array, d["x"])
        np.testing.assert_array_equal(array, d["y"])
Exemple #22
0
def slice_compare(array, indexing, expected_array):
    data = ds.NumpySlicesDataset([array])
    if isinstance(indexing, list) and indexing and not isinstance(indexing[0], int):
        data = data.map(operations=ops.Slice(*indexing))
    else:
        data = data.map(operations=ops.Slice(indexing))
    for d in data.create_dict_iterator(output_numpy=True):
        np.testing.assert_array_equal(expected_array, d['column_0'])
Exemple #23
0
def test_numpy_slices_list_3():
    logger.info("Test Slicing list in the first dimension.")

    np_data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
    ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False)

    for i, data in enumerate(ds):
        assert np.equal(data[0].asnumpy(), np_data[i]).all()
Exemple #24
0
def test_numpy_slices_list_2():
    logger.info("Test Slicing a 2D list into 1D list.")

    np_data = [[1, 2], [3, 4]]
    ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False)

    for i, data in enumerate(ds):
        assert np.equal(data[0].asnumpy(), np_data[i]).all()
Exemple #25
0
 def test_config(arr, input_columns, output_cols, op_list):
     data = ds.NumpySlicesDataset(arr, column_names=input_columns, shuffle=False)
     data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols,
                     column_order=output_cols)
     res = []
     for i in data.create_dict_iterator(output_numpy=True):
         for col_name in output_cols:
             res.append(i[col_name].tolist())
     return res
Exemple #26
0
def create_dataset_diy():
    batch0_data = np.random.rand(1, 1, 224, 224, 96).astype(np.float32)
    batch0_seg = np.random.rand(1, 4, 224, 224, 96).astype(np.float32)

    # data = {"images": [batch0_data for _ in range(5)], "segs": [batch0_seg for _ in range(5)]}
    data = {"images": [batch0_data], "segs": [batch0_seg]}
    train_ds = ds.NumpySlicesDataset(data)

    return train_ds
Exemple #27
0
def compare(array, res, idx, cnt):
    data = ds.NumpySlicesDataset([array], column_names="x")
    data = data.batch(2)
    data = data.map(operations=ops.Unique(), input_columns=["x"], output_columns=["x", "y", "z"],
                    column_order=["x", "y", "z"])
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        np.testing.assert_array_equal(res, d["x"])
        np.testing.assert_array_equal(idx, d["y"])
        np.testing.assert_array_equal(cnt, d["z"])
def test_sliding_window_exception():
    try:
        _ = text.SlidingWindow(0, 0)
        assert False
    except ValueError:
        pass

    try:
        _ = text.SlidingWindow("1", 0)
        assert False
    except TypeError:
        pass

    try:
        _ = text.SlidingWindow(1, "0")
        assert False
    except TypeError:
        pass

    try:
        inputs = [[1, 2, 3, 4, 5]]
        dataset = ds.NumpySlicesDataset(inputs,
                                        column_names=["text"],
                                        shuffle=False)
        dataset = dataset.map(operations=text.SlidingWindow(3, -100),
                              input_columns=["text"])
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
        assert "axis supports 0 or -1 only for now." in str(e)

    try:
        inputs = ["aa", "bb", "cc"]
        dataset = ds.NumpySlicesDataset(inputs,
                                        column_names=["text"],
                                        shuffle=False)
        dataset = dataset.map(operations=text.SlidingWindow(2, 0),
                              input_columns=["text"])
        for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
        assert "SlidingWindosOp supports 1D Tensors only for now." in str(e)
Exemple #29
0
 def test_config(arr, op_list, prob=0.5):
     try:
         data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
         data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"])
         res = []
         for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
             res.append(i["col"].tolist())
         return res
     except (TypeError, ValueError) as e:
         return str(e)
Exemple #30
0
def test_numpy_slices_tuple_1():
    logger.info("Test slicing a list of tuple.")

    np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])]
    ds = de.NumpySlicesDataset(np_data, shuffle=False)

    for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)):
        assert np.equal(data, np_data[i]).all()

    assert sum([1 for _ in ds]) == 3