def test_generator_len(self): source = PreLoadedSource([0] * 10, [0] * 10) adapter = AdapterMock() batch_generator = MiniBatchGenerator(source, adapter) self.assertEqual(len(batch_generator), len(source))
def fetch_strokes(source, num_strokes): cropped_strokes = [] dummy_out = [] for strokes, text in source.get_sequences(): for stroke in strokes: if len(cropped_strokes) > num_strokes: return PreLoadedSource(cropped_strokes, dummy_out) try: deltas = stroke.stroke_to_points() except BadStrokeException: continue cropped_strokes.append(deltas) dummy_out.append('') return PreLoadedSource(cropped_strokes, dummy_out)
def normalized_source(source, normalizer): seqs_in = [] seqs_out = [] for seq_in, seq_out in source.get_sequences(): seqs_in.append(seq_in) seqs_out.append(seq_out) processed = normalizer.preprocess(seqs_in) return PreLoadedSource(processed, seqs_out)
def test_data_processing_calls(self): preprocessor = PreprocessorMock([]) provider = PreLoadedSource([2, 4], ["two four"]) repo = DataRepoMock() splitter = DataSplitterMock(provider) compiler = DataSetCompiler(preprocessor, splitter, repo) compiler.compile() self.assertEqual(preprocessor.processed_args, [(1, '1'), (2, '2'), (3, '3')])
def test_preprocessor_fits_training_data(self): preprocessor = PreprocessorMock([]) provider = PreLoadedSource([2, 4], ["two four"]) repo = DataRepoMock() splitter = DataSplitterMock(provider) compiler = DataSetCompiler(preprocessor, splitter, repo) compiler.compile() self.assertEqual(list(preprocessor.fit_arg.get_sequences()), [(1, '1')])
def test_fit_and_process_example(self): preprocessor = PreProcessor(steps=[DummyStep(), DummyStep()]) xs = [1, 2] ys = [0, 0] source = PreLoadedSource(xs, ys) preprocessor.fit(source) x, y = preprocessor.pre_process_example(1, 2) self.assertEqual(x, 13) self.assertEqual(y, 2)
def test_compiler_performs_splits_data(self): preprocessor = PreprocessorMock([]) x = [1, 2, 3] y = ['one', 'two', 'three'] provider = PreLoadedSource(x, y) repo = DataRepoMock() splitter = DataSplitterMock(provider) compiler = DataSetCompiler(preprocessor, splitter, repo) compiler.compile() self.assertTrue(splitter.called)
def dummy_source(): sin = 'HHHH eee lll lll ooo ,,, www oooo rrr lll ddd' sout = 'Hello, world' char_table = CharacterTable() codes = [char_table.encode(ch) for ch in sin] x = to_categorical(codes, num_classes=len(char_table)) x = x.reshape(1, len(sin), -1) return PreLoadedSource(x, [sout])
def test_values_in_mini_batches(self): num_examples = 1 source = PreLoadedSource([0] * num_examples, [0] * num_examples) adapter = AdapterMock() batch_size = 2 batch_generator = MiniBatchGenerator(source, adapter, batch_size=batch_size) batches = [] for mini_batch in batch_generator.get_examples(): if len(batches) >= len(batch_generator) // batch_size + 1: break batches.append(mini_batch) self.assertEqual(batches[0][0], [0]) self.assertEqual(batches[0][1], [0])
def test_repo(self): preprocessor = PreProcessor([]) provider = PreLoadedSource([1, 2, 3], ['1', '2', '3']) repo = DataRepoMock() splitter = DataSplitterMock(provider) compiler = DataSetCompiler(preprocessor, splitter, repo) compiler.compile() self.assertEqual(len(repo.slices), 3) train, val, test = repo.slices self.assertEqual(len(train), 1) self.assertEqual(len(val), 1) self.assertEqual(len(test), 1) self.assertEqual(train, [(1, '1')]) self.assertEqual(val, [(2, '2')]) self.assertEqual(test, [(3, '3')])
def test_data(self): return PreLoadedSource([3], ['3'])
def validation_data(self): return PreLoadedSource([2], ['2'])
def train_data(self): return PreLoadedSource([1], ['1'])