def test_fetch(self): def callback(indices, key_indices): self.assertIsNone(indices) self.assertIsNone(key_indices) dataset = dummy_dataset.DummyDataset(mode=self.mode, return_array=self.return_array, callback=callback) output = dataset.fetch() if self.mode is tuple: expected = tuple(dataset.data) elif self.mode is dict: expected = dict(zip(('a', 'b', 'c'), dataset.data)) elif self.mode is None: expected = dataset.data[0] np.testing.assert_equal(output, expected) if self.mode is dict: output = output.values() elif self.mode is None: output = output, for out in output: if self.return_array: self.assertIsInstance(out, np.ndarray) else: self.assertIsInstance(out, list)
def test_with_converter(self): dataset = dummy_dataset.DummyDataset(mode=self.mode) def converter(*args, **kwargs): if self.mode is tuple: np.testing.assert_equal(args, tuple(dataset.data)) self.assertEqual(kwargs, {}) elif self.mode is dict: self.assertEqual(args, ()) np.testing.assert_equal( kwargs, dict(zip(('a', 'b', 'c'), dataset.data))) elif self.mode is None: np.testing.assert_equal(args, tuple(dataset.data)) self.assertEqual(kwargs, {}) return 'converted' view = dataset.with_converter(converter) self.assertIsInstance(view, chainer.dataset.TabularDataset) self.assertEqual(len(view), len(dataset)) self.assertEqual(view.keys, dataset.keys) self.assertEqual(view.mode, dataset.mode) self.assertEqual(view.get_examples(None, None), dataset.get_examples(None, None)) self.assertEqual(view.convert(view.fetch()), 'converted')
def test_as_dict(self): dataset = dummy_dataset.DummyDataset(mode=self.mode) view = dataset.as_dict() self.assertIsInstance(view, chainer.dataset.TabularDataset) self.assertEqual(len(view), len(dataset)) self.assertEqual(view.keys, dataset.keys) self.assertEqual(view.mode, dict)
def test_concat(self): def callback_a(indices, key_indices): self.assertEqual(indices, self.expected_indices_a) self.assertIsNone(key_indices) dataset_a = dummy_dataset.DummyDataset(keys=('a', 'b', 'c') if self.mode_b else ('a', ), mode=self.mode_a, return_array=self.return_array, callback=callback_a, convert=True) def callback_b(indices, key_indices): self.assertEqual(indices, self.expected_indices_b) self.assertIsNone(key_indices) dataset_b = dummy_dataset.DummyDataset(size=5, keys=('a', 'b', 'c') if self.mode_a else ('a', ), mode=self.mode_b, return_array=self.return_array, callback=callback_b) view = dataset_a.concat(dataset_b) self.assertIsInstance(view, chainer.dataset.TabularDataset) self.assertEqual(len(view), len(dataset_a) + len(dataset_b)) self.assertEqual(view.keys, dataset_a.keys) self.assertEqual(view.mode, dataset_a.mode) output = view.get_examples(self.indices, None) data = np.hstack((dataset_a.data, dataset_b.data)) if self.indices is not None: data = data[:, self.indices] for out, d in six.moves.zip_longest(output, data): np.testing.assert_equal(out, d) if self.return_array and operator.xor( hasattr(self, 'expected_indices_a'), hasattr(self, 'expected_indices_b')): self.assertIsInstance(out, np.ndarray) else: self.assertIsInstance(out, list) self.assertEqual(view.convert(output), 'converted')
def test_slice(self): def callback(indices, key_indices): if isinstance(self.indices, list) \ or isinstance(self.get_examples_indices, list): self.assertIsInstance(indices, list) elif isinstance(self.indices, slice) \ or isinstance(self.get_examples_indices, slice): self.assertIsInstance(indices, slice) else: self.assertIsNone(indices) if self.keys is None and self.get_examples_key_indices is None: self.assertIsNone(key_indices) else: self.assertIsInstance(key_indices, tuple) dataset = dummy_dataset.DummyDataset( mode=self.mode, return_array=self.return_array, callback=callback) if self.exception is not None: with self.assertRaises(self.exception): if self.keys is None: dataset.slice[self.indices] else: dataset.slice[self.indices, self.keys] return if self.keys is None: view = dataset.slice[self.indices] data = dataset.data[:, _indices_for_numpy(self.indices)] else: view = dataset.slice[self.indices, self.keys] key_indices = [ {'a': 0, 'b': 1, 'c': 2}.get(key, key) for key in self.keys] data = dataset.data[key_indices][ :, _indices_for_numpy(self.indices)] self.assertIsInstance(view, chainer.dataset.TabularDataset) self.assertEqual(len(view), self.expected_len) self.assertEqual(view.keys, self.expected_keys) self.assertEqual(view.mode, self.mode) output = view.get_examples( self.get_examples_indices, self.get_examples_key_indices) if self.get_examples_indices is not None: data = data[:, _indices_for_numpy(self.get_examples_indices)] if self.get_examples_key_indices is not None: data = data[list(self.get_examples_key_indices)] for out, d in six.moves.zip_longest(output, data): np.testing.assert_equal(out, d) if self.return_array: self.assertIsInstance(out, np.ndarray) else: self.assertIsInstance(out, list)
def test_asdict(self): dataset = dummy_dataset.DummyDataset(mode=self.mode, convert=True) view = dataset.asdict() self.assertIsInstance(view, chainer.dataset.TabularDataset) self.assertEqual(len(view), len(dataset)) self.assertEqual(view.keys, dataset.keys) self.assertEqual(view.mode, dict) self.assertEqual(view.get_examples(None, None), dataset.get_examples(None, None)) self.assertEqual(view.convert(view.fetch()), 'converted')
def test_delegate_dataset(self): dataset = tabular.DelegateDataset( dummy_dataset.DummyDataset(mode=self.mode)) self.assertIsInstance(dataset, chainer.dataset.TabularDataset) self.assertEqual(len(dataset), len(dataset.dataset)) self.assertEqual(dataset.keys, dataset.dataset.keys) self.assertEqual(dataset.mode, dataset.dataset.mode) self.assertEqual( dataset.get_example(3), dataset.dataset.get_example(3))
def test_transform_batch_length_changed(self): dataset = dummy_dataset.DummyDataset() def transform_batch(a, b, c): if self.mode is tuple: return a + [0], b elif self.mode is dict: return {'a': a + [0], 'b': b} view = dataset.transform_batch(('a', 'b'), transform_batch) with self.assertRaises(ValueError): view.get_examples(None, None)
def test_join(self): def callback_a(indices, key_indices): self.assertIsNone(indices) self.assertEqual(key_indices, self.expected_key_indices_a) dataset_a = dummy_dataset.DummyDataset(mode=self.mode_a, return_array=self.return_array, callback=callback_a, convert=True) def callback_b(indices, key_indices): self.assertIsNone(indices) self.assertEqual(key_indices, self.expected_key_indices_b) dataset_b = dummy_dataset.DummyDataset(keys=('d', 'e'), mode=self.mode_b, return_array=self.return_array, callback=callback_b) view = dataset_a.join(dataset_b) self.assertIsInstance(view, chainer.dataset.TabularDataset) self.assertEqual(len(view), len(dataset_a)) self.assertEqual(view.keys, dataset_a.keys + dataset_b.keys) self.assertEqual(view.mode, dataset_a.mode or dataset_b.mode or tuple) output = view.get_examples(None, self.key_indices) data = np.vstack((dataset_a.data, dataset_b.data)) if self.key_indices is not None: data = data[list(self.key_indices)] for out, d in six.moves.zip_longest(output, data): np.testing.assert_equal(out, d) if self.return_array: self.assertIsInstance(out, np.ndarray) else: self.assertIsInstance(out, list) self.assertEqual(view.convert(output), 'converted')
def test_get_example(self): def callback(indices, key_indices): self.assertEqual(indices, [3]) self.assertIsNone(key_indices) dataset = dummy_dataset.DummyDataset(mode=self.mode, return_array=self.return_array, callback=callback) if self.mode is tuple: expected = tuple(dataset.data[:, 3]) elif self.mode is dict: expected = dict(zip(('a', 'b', 'c'), dataset.data[:, 3])) self.assertEqual(dataset.get_example(3), expected)
def test_iter(self): dataset = dummy_dataset.DummyDataset(mode=self.mode, return_array=self.return_array) it = iter(dataset) for i in range(10): if self.mode is tuple: expected = tuple(dataset.data[:, i]) elif self.mode is dict: expected = dict(zip(('a', 'b', 'c'), dataset.data[:, i])) elif self.mode is None: expected = dataset.data[0, i] self.assertEqual(next(it), expected) with self.assertRaises(StopIteration): next(it)
def test_convert(self): dataset = dummy_dataset.DummyDataset(mode=self.mode, return_array=self.return_array) output = dataset.convert(dataset.fetch()) if self.mode is tuple: expected = tuple(dataset.data) elif self.mode is dict: expected = dict(zip(('a', 'b', 'c'), dataset.data)) elif self.mode is None: expected = dataset.data[0] np.testing.assert_equal(output, expected) if self.mode is dict: output = output.values() elif self.mode is None: output = output, for out in output: self.assertIsInstance(out, np.ndarray)
def test_transform(self): dataset = dummy_dataset.DummyDataset(mode=self.in_mode, return_array=True) def transform(*args, **kwargs): if self.in_mode is tuple: self.assertEqual(len(args), 3) self.assertEqual(len(kwargs), 0) a, b, c = args elif self.in_mode is dict: self.assertEqual(len(args), 0) self.assertEqual(len(kwargs), 3) a, b, c = kwargs['a'], kwargs['b'], kwargs['c'] if self.with_batch: self.assertIsInstance(a, np.ndarray) self.assertIsInstance(b, np.ndarray) self.assertIsInstance(c, np.ndarray) else: self.assertIsInstance(a, float) self.assertIsInstance(b, float) self.assertIsInstance(c, float) if self.out_mode is tuple: return a + b, b + c elif self.out_mode is dict: return {'alpha': a + b, 'beta': b + c} else: return a + b + c if self.out_mode is not None: if self.with_batch: view = dataset.transform_batch(('alpha', 'beta'), transform) else: view = dataset.transform(('alpha', 'beta'), transform) data = np.vstack((dataset.data[0] + dataset.data[1], dataset.data[1] + dataset.data[2])) else: if self.with_batch: view = dataset.transform_batch('alpha', transform) else: view = dataset.transform('alpha', transform) data = dataset.data.sum(axis=0, keepdims=True) self.assertIsInstance(view, chainer.dataset.TabularDataset) self.assertEqual(len(view), len(dataset)) if self.out_mode is not None: self.assertEqual(view.keys, ('alpha', 'beta')) self.assertEqual(view.mode, self.out_mode) else: self.assertEqual(view.keys, ('alpha', )) self.assertEqual(view.mode, tuple) output = view.get_examples(self.indices, self.key_indices) if self.indices is not None: data = data[:, self.indices] if self.key_indices is not None: data = data[list(self.key_indices)] for out, d in six.moves.zip_longest(output, data): np.testing.assert_equal(out, d) if self.with_batch: self.assertIsInstance(out, np.ndarray) else: self.assertIsInstance(out, list)
def test_transform_batch_inconsistent_mode(self): dataset = dummy_dataset.DummyDataset() view = dataset.transform_batch(('a', 'b', 'c'), self._transform) view.get_examples(None, None) with self.assertRaises(ValueError): view.get_examples(None, None)
def test_join_conflict_key(self): dataset_a = dummy_dataset.DummyDataset() dataset_b = dummy_dataset.DummyDataset(keys=('a', 'd')) with self.assertRaises(ValueError): dataset_a.join(dataset_b)
def test_join_length(self): dataset_a = dummy_dataset.DummyDataset() dataset_b = dummy_dataset.DummyDataset(size=5, keys=('d', 'e')) with self.assertRaises(ValueError): dataset_a.join(dataset_b)
def test_concat_key_order(self): dataset_a = dummy_dataset.DummyDataset() dataset_b = dummy_dataset.DummyDataset(keys=('b', 'a', 'c')) with self.assertRaises(ValueError): dataset_a.concat(dataset_b)