def test_basics_input_indexes(): """ Test basic cases for input indexes. """ logger.info("test_basics_input_indexes") data = ds.NumpySlicesDataset([1, 2, 3], column_names=["col_1"]) assert data.input_indexs == () data.input_indexs = 10 assert data.input_indexs == 10 data = data.shuffle(2) assert data.input_indexs == 10 data = data.project(["col_1"]) assert data.input_indexs == 10 data2 = ds.NumpySlicesDataset([1, 2, 3], column_names=["col_1"]) assert data2.input_indexs == () data2 = data2.shuffle(2) assert data2.input_indexs == () data2 = data2.project(["col_1"]) assert data2.input_indexs == () data2.input_indexs = 20 assert data2.input_indexs == 20 data3 = data + data2 assert data3.input_indexs == 10
def test_callbacks_validations(): logger.info("test_callbacks_validations") with pytest.raises(Exception) as err: data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False) data.map(operations=(lambda x: x), callbacks=0) assert "Argument callbacks with value 0 is not " in str(err.value) with pytest.raises(Exception) as err: my_cb1 = MyDSCallback() data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False) data.map(operations=(lambda x: x), callbacks=[my_cb1, 0]) assert "Argument callbacks[1] with value 0 is not " in str(err.value) with pytest.raises(Exception) as err: class BadCB(DSCallback): pass my_cb = BadCB() data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False) data = data.map(operations=(lambda x: x), callbacks=my_cb) for _ in data: pass assert "Provided Callback class did not override any of the 6 callback methods." in str( err.value)
def test_numpy_slices_invalid_column_names_type(): logger.info("Test incorrect column_names input") np_data = [1, 2, 3] with pytest.raises(TypeError) as err: de.NumpySlicesDataset(np_data, column_names=[1], shuffle=False) assert "Argument column_names[0] with value 1 is not of type [<class 'str'>]" in str(err.value)
def test_numpyslices_sampler_chain2(): """ Test NumpySlicesDataset sampler chain """ logger.info("test_numpyslices_sampler_chain2") # Create NumpySlicesDataset with sampler chain # Use 2 statements to add child sampler np_data = [1, 2, 3, 4] sampler = ds.SequentialSampler(start_index=1, num_samples=1) child_sampler = ds.SequentialSampler(start_index=1, num_samples=2) sampler.add_child(child_sampler) data1 = ds.NumpySlicesDataset(np_data, sampler=sampler) # Verify dataset size data1_size = data1.get_dataset_size() logger.info("dataset size is: {}".format(data1_size)) assert data1_size == 1 # Verify number of rows assert sum([1 for _ in data1]) == 1 # Verify dataset contents res = [] for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): logger.info("item: {}".format(item)) res.append(item) logger.info("dataset: {}".format(res))
def test_callbacks_sink_simulation(): logger.info("test_callback_sink_simulation") events = [] epochs = 2 my_cb = MyWaitedCallback(events, 1) data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False) data = data.map(operations=(lambda x: x), callbacks=my_cb) data = data.to_device() data.send(num_epochs=epochs) for e in range(epochs): for s in range(4): time.sleep(0.5) events.append(f"ms_step_end_{e + 1}_{e * 4 + s + 1}") my_cb.step_end(run_context=0) events.append(f"ms_epoch_end_{e + 1}_{(e + 1) * 4}") my_cb.epoch_end(run_context=0) expected_synced_events = [ 'ms_step_end_1_1', 'ds_step_begin_1_2', 'ms_step_end_1_2', 'ds_step_begin_1_3', 'ms_step_end_1_3', 'ds_step_begin_1_4', 'ms_step_end_1_4', 'ms_epoch_end_1_4', 'ds_epoch_begin_2_4', 'ds_step_begin_2_5', 'ms_step_end_2_5', 'ds_step_begin_2_6', 'ms_step_end_2_6', 'ds_step_begin_2_7', 'ms_step_end_2_7', 'ds_step_begin_2_8', 'ms_step_end_2_8', 'ms_epoch_end_2_8' ] assert events == expected_synced_events
def test_callbacks_non_sink_mismatch_size(): logger.info("test_callbacks_non_sink_mismatch_size") default_timeout = ds.config.get_callback_timeout() ds.config.set_callback_timeout(1) events = [] my_cb1 = MyWaitedCallback(events, 2) my_cb2 = MyMSCallback(events) arr = [1, 2, 3, 4] data = ds.NumpySlicesDataset((arr, arr), column_names=["c1", "c2"], shuffle=False) data = data.map(operations=(lambda x: x), callbacks=my_cb1) data = data.batch(3) net = Net() model = Model(net) with pytest.raises(Exception) as err: model.train(2, data, dataset_sink_mode=False, callbacks=[my_cb2, my_cb1]) assert "RuntimeError: ds_step_begin timed out after 1 second(s)" in str( err.value) ds.config.set_callback_timeout(default_timeout)
def test_numpy_slices_invalid_empty_column_names(): logger.info("Test incorrect column_names input") np_data = [1, 2, 3] with pytest.raises(ValueError) as err: de.NumpySlicesDataset(np_data, column_names=[], shuffle=False) assert "column_names should not be empty" in str(err.value)
def test_numpyslices_sampler_chain_batch(): """ Test NumpySlicesDataset sampler chaining, with batch """ logger.info("test_numpyslices_sampler_chain_batch") # Create NumpySlicesDataset with sampler chain np_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] sampler = ds.SequentialSampler(start_index=1, num_samples=3) sampler = sampler.add_child( ds.SequentialSampler(start_index=1, num_samples=2)) data1 = ds.NumpySlicesDataset(np_data, sampler=sampler) data1 = data1.batch(batch_size=3, drop_remainder=False) # Verify dataset size data1_size = data1.get_dataset_size() logger.info("dataset size is: {}".format(data1_size)) assert data1_size == 4 # Verify number of rows assert sum([1 for _ in data1]) == 4 # Verify dataset contents res = [] for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): logger.info("item: {}".format(item)) res.append(item) logger.info("dataset: {}".format(res))
def test_callbacks_non_sink_batch_size2(): logger.info("test_callbacks_non_sink_batch_size2") events = [] my_cb1 = MyWaitedCallback(events, 2) my_cb2 = MyMSCallback(events) arr = [1, 2, 3, 4] data = ds.NumpySlicesDataset((arr, arr), column_names=["c1", "c2"], shuffle=False) data = data.map(operations=(lambda x: x), callbacks=my_cb1) data = data.batch(2) net = Net() model = Model(net) model.train(2, data, dataset_sink_mode=False, callbacks=[my_cb2, my_cb1]) expected_synced_events = [ 'ms_step_end_1_1', 'ds_step_begin_1_3', 'ms_step_end_1_2', 'ms_epoch_end_1_2', 'ds_epoch_begin_2_4', 'ds_step_begin_2_5', 'ms_step_end_2_3', 'ds_step_begin_2_7', 'ms_step_end_2_4', 'ms_epoch_end_2_4' ] assert events == expected_synced_events
def compare(in1, in2, length, out1, out2): data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]}) data = data.map(input_columns=["s1", "s2"], operations=text.TruncateSequencePair(length)) for d in data.create_dict_iterator(): np.testing.assert_array_equal(out1, d["s1"]) np.testing.assert_array_equal(out2, d["s2"])
def test_numpy_slices_distributed_zero_shard(): logger.info("Test Slicing a 1D list.") np_data = [1, 2, 3] with pytest.raises(ValueError) as err: de.NumpySlicesDataset(np_data, num_shards=0, shard_id=0, shuffle=False) assert "Input num_shards is not within the required interval of [1, 2147483647]." in str(err.value)
def test_clear_callback(): logger.info("test_clear_callback") # this test case will test that callback is removed for get_dataset_size and output_shape/type class FlagCallback(DSCallback): def __init__(self): super().__init__(step_size=1) self.flag = False self.row_cnt = 0 def ds_begin(self, ds_run_context): # if callback isn't removed in getter pass, this function will be called self.flag = True def ds_step_begin(self, ds_run_context): self.row_cnt += 1 data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False) cb = FlagCallback() # make sure variables are properly initialized before testing assert not cb.flag and cb.row_cnt == 0 data = data.map(operations=(lambda x: x), callbacks=cb) assert data.get_dataset_size() == 4 assert data.output_shapes() == [[]] # make sure callback is never called by checking flag and row_cnt assert not cb.flag and cb.row_cnt == 0 for _ in data.create_dict_iterator(num_epochs=1): pass # this ensure that callback is indeed called assert cb.flag and cb.row_cnt == 4
def test_compose_with_custom_function(): """ Test Python Compose with custom function """ def custom_function(x): return (x, x * x) # First dataset op_list = [ lambda x: x * 3, custom_function, # convert two column output to one lambda *images: np.stack(images) ] data = ds.NumpySlicesDataset([[1, 2]], column_names=["col0"], shuffle=False) data = data.map(input_columns=["col0"], operations=op_list) # res = [] for i in data.create_dict_iterator(output_numpy=True): res.append(i["col0"].tolist()) assert res == [[[3, 6], [9, 36]]]
def test_callbacks_one_cb(): logger.info("test_callbacks_one_cb") data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False) events1 = [] events2 = [] events3 = [] my_begin = Begin(events=events1, cb_id=1) my_epoch_begin = EpochBegin(events=events2, cb_id=2) my_epoch_end = EpochEnd(events=events3, cb_id=3) my_step_begin = StepBegin(events=events3, cb_id=3) my_step_end = StepEnd(events=events2, cb_id=2) data = data.map(operations=(lambda x: x), callbacks=my_begin) data = data.map(operations=(lambda x: x), callbacks=[my_epoch_begin, my_step_end]) data = data.map(operations=(lambda x: x), callbacks=[my_epoch_end, my_step_begin]) itr = data.create_tuple_iterator(num_epochs=2) for _ in range(2): for _ in itr: pass expected_events1 = [('begin_0_0_0', [1])] expected_events2 = [('epoch_begin_1_0_0', [2]), ('step_end_1_1_1', [2]), ('step_end_1_2_2', [2]), ('step_end_1_3_3', [2]), ('step_end_1_4_4', [2]), ('epoch_begin_2_0_4', [2]), ('step_end_2_1_5', [2]), ('step_end_2_2_6', [2]), ('step_end_2_3_7', [2]), ('step_end_2_4_8', [2])] expected_events3 = [('step_begin_1_1_1', [3]), ('step_begin_1_2_2', [3]), ('step_begin_1_3_3', [3]), ('step_begin_1_4_4', [3]), ('epoch_end_1_4_4', [3]), ('step_begin_2_1_5', [3]), ('step_begin_2_2_6', [3]), ('step_begin_2_3_7', [3]), ('step_begin_2_4_8', [3]), ('epoch_end_2_4_8', [3])] assert events1 == expected_events1 assert events2 == expected_events2 assert events3 == expected_events3
def test_numpy_slices_invalid_empty_data_column(): logger.info("Test incorrect column_names input") np_data = [] with pytest.raises(ValueError) as err: de.NumpySlicesDataset(np_data, shuffle=False) assert "Argument data cannot be empty" in str(err.value)
def build_test_case_2maps(epochs, steps): events = [] my_cb1 = MyDSCallback(events=events, cb_id=0) my_cb2 = MyDSCallback(events=events, cb_id=1) arr = list(range(1, steps + 1)) data = ds.NumpySlicesDataset(arr, shuffle=False) data = data.map(operations=(lambda x: x), callbacks=my_cb1) data = data.map(operations=(lambda x: x), callbacks=my_cb2) itr = data.create_tuple_iterator(num_epochs=epochs) for _ in range(epochs): for _ in itr: pass expected_events = generate_expected(epochs, steps, map_num=2) assert expected_events[1:] == events[1:] for event in events: assert len(event) == 2 event, cb_ids = event if event != "begin_0_0_0": assert cb_ids[0] == 0 assert cb_ids[1] == 1
def pad_compare(array, pad_shape, pad_value, res): data = ds.NumpySlicesDataset([array]) if pad_value is not None: data = data.map(operations=ops.PadEnd(pad_shape, pad_value)) else: data = data.map(operations=ops.PadEnd(pad_shape)) for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(res, d[0])
def test_numpy_slices_sequential_sampler(): logger.info("Test numpy_slices_dataset with SequentialSampler and repeat.") np_data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] ds = de.NumpySlicesDataset(np_data, sampler=de.SequentialSampler()).repeat(2) for i, data in enumerate(ds): assert np.equal(data[0].asnumpy(), np_data[i % 8]).all()
def pipeline(): sampler = ds.SubsetSampler(indices, num_samples) data = ds.NumpySlicesDataset(list(range(0, 10)), sampler=sampler) data2 = ds.NumpySlicesDataset(list(range(0, 10)), sampler=indices, num_samples=num_samples) dataset_size = data.get_dataset_size() dataset_size2 = data.get_dataset_size() res1 = [ d[0] for d in data.create_tuple_iterator(num_epochs=1, output_numpy=True) ], dataset_size res2 = [ d[0] for d in data2.create_tuple_iterator(num_epochs=1, output_numpy=True) ], dataset_size2 return res1, res2
def test_numpy_slices_list_1(): logger.info("Test Slicing a 1D list.") np_data = [1, 2, 3] ds = de.NumpySlicesDataset(np_data, shuffle=False) for i, data in enumerate(ds): assert data[0].asnumpy() == np_data[i]
def compare(array): data = ds.NumpySlicesDataset([array], column_names="x") array = np.array(array) data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"], column_order=["x", "y"]) for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(array, d["x"]) np.testing.assert_array_equal(array, d["y"])
def slice_compare(array, indexing, expected_array): data = ds.NumpySlicesDataset([array]) if isinstance(indexing, list) and indexing and not isinstance(indexing[0], int): data = data.map(operations=ops.Slice(*indexing)) else: data = data.map(operations=ops.Slice(indexing)) for d in data.create_dict_iterator(output_numpy=True): np.testing.assert_array_equal(expected_array, d['column_0'])
def test_numpy_slices_list_3(): logger.info("Test Slicing list in the first dimension.") np_data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False) for i, data in enumerate(ds): assert np.equal(data[0].asnumpy(), np_data[i]).all()
def test_numpy_slices_list_2(): logger.info("Test Slicing a 2D list into 1D list.") np_data = [[1, 2], [3, 4]] ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False) for i, data in enumerate(ds): assert np.equal(data[0].asnumpy(), np_data[i]).all()
def test_config(arr, input_columns, output_cols, op_list): data = ds.NumpySlicesDataset(arr, column_names=input_columns, shuffle=False) data = data.map(operations=op_list, input_columns=input_columns, output_columns=output_cols, column_order=output_cols) res = [] for i in data.create_dict_iterator(output_numpy=True): for col_name in output_cols: res.append(i[col_name].tolist()) return res
def create_dataset_diy(): batch0_data = np.random.rand(1, 1, 224, 224, 96).astype(np.float32) batch0_seg = np.random.rand(1, 4, 224, 224, 96).astype(np.float32) # data = {"images": [batch0_data for _ in range(5)], "segs": [batch0_seg for _ in range(5)]} data = {"images": [batch0_data], "segs": [batch0_seg]} train_ds = ds.NumpySlicesDataset(data) return train_ds
def compare(array, res, idx, cnt): data = ds.NumpySlicesDataset([array], column_names="x") data = data.batch(2) data = data.map(operations=ops.Unique(), input_columns=["x"], output_columns=["x", "y", "z"], column_order=["x", "y", "z"]) for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(res, d["x"]) np.testing.assert_array_equal(idx, d["y"]) np.testing.assert_array_equal(cnt, d["z"])
def test_sliding_window_exception(): try: _ = text.SlidingWindow(0, 0) assert False except ValueError: pass try: _ = text.SlidingWindow("1", 0) assert False except TypeError: pass try: _ = text.SlidingWindow(1, "0") assert False except TypeError: pass try: inputs = [[1, 2, 3, 4, 5]] dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) dataset = dataset.map(operations=text.SlidingWindow(3, -100), input_columns=["text"]) for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert False except RuntimeError as e: assert "axis supports 0 or -1 only for now." in str(e) try: inputs = ["aa", "bb", "cc"] dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) dataset = dataset.map(operations=text.SlidingWindow(2, 0), input_columns=["text"]) for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert False except RuntimeError as e: assert "SlidingWindosOp supports 1D Tensors only for now." in str(e)
def test_config(arr, op_list, prob=0.5): try: data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"]) res = [] for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: return str(e)
def test_numpy_slices_tuple_1(): logger.info("Test slicing a list of tuple.") np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])] ds = de.NumpySlicesDataset(np_data, shuffle=False) for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): assert np.equal(data, np_data[i]).all() assert sum([1 for _ in ds]) == 3