def test_construct_batches(self): lines = [ "1,2,3,4|0,1;2,3,4,5|1,0; 2,2,2,2|0, 0;", "1, 2, 3, 0|0, 0;", "1,2,3,4|0,1;2,3,4,5|1,0; 2,2,2,1|0, 0;", "1, 2, 3, 0|0, 0;", "2,3,4,5|1,1;2,2,3,4|0,0;" ] data = dp.strip_split_samples(lines) len_input, len_output, splitter = dp.get_timestep_split(data[0][0]) batches, original_order = dp.construct_batches(data, len_output, len_input, splitter, True) order = (0, 2, 4, 1, 3) # 2 samples with 3 timesteps each batch0 = np.array([[[1.0, 2.0, 3.0, 4.0, 0.0, 1.0], [2.0, 3.0, 4.0, 5.0, 1.0, 0.0], [2.0, 2.0, 2.0, 2.0, 0.0, 0.0]], [[1.0, 2.0, 3.0, 4.0, 0.0, 1.0], [2.0, 3.0, 4.0, 5.0, 1.0, 0.0], [2.0, 2.0, 2.0, 1.0, 0.0, 0.0]]]) # 1 sample with 2 timesteps each batch1 = np.array([[[2.0, 3.0, 4.0, 5.0, 1.0, 1.0], [2.0, 2.0, 3.0, 4.0, 0.0, 0.0]]]) # 2 samples with 1 timestep each batch2 = np.array([[[1.0, 2.0, 3.0, 0.0, 0.0, 0.0]], [[1.0, 2.0, 3.0, 0.0, 0.0, 0.0]]]) self.assertEqual(order, original_order) self.assertTrue((batch0 == batches[0]).any()) self.assertTrue((batch1 == batches[1]).any()) self.assertTrue((batch2 == batches[2]).any())
def test_sample_preprocess_binarize(self): sample = ["1,2,3|0,1", "2,3,4|1,0"] len_input, len_output, splitter = dp.get_timestep_split(sample[0]) psample = dp.sample_preprocess(sample, len_output, len_input, splitter, False) self.assertEqual(psample, [['1', 0, '2', 0, '3', 0, '0', '1'], ['2', 0, '3', 0, '4', 0, '1', '0']])
def test_timestep_split_0_out(self): timestep = "1,2,3,4,1,0" len_input, len_output, splitter = dp.get_timestep_split(timestep) self.assertEqual(len_input, 6) self.assertEqual(len_output, 0) inp, out = splitter(timestep, 'fake') self.assertEqual(inp, ['1', '2', '3', '4', '1', '0']) self.assertEqual(out, [])
def test_sample_preprocess(self): sample = ["1,2,3,4|0,1", "2,3,4,5|1,0", " 2,2,2,2|0, 0"] len_input, len_output, splitter = dp.get_timestep_split(sample[0]) psample = dp.sample_preprocess(sample, len_output, len_input, splitter, True) self.assertEqual( psample, [['1', '2', '3', '4', '0', '1'], ['2', '3', '4', '5', '1', '0'], [' 2', '2', '2', '2', '0', ' 0']])
def test_datafile_bin(self): fp = dp.get_data('fixtures/manualx.zip') lines = dp.open_datafile(fp) data = dp.strip_split_samples(lines) len_input, len_output, splitter = dp.get_timestep_split(data[0][0]) batches, original_order = dp.construct_batches(data, len_output, len_input, splitter, False) self.assertEqual(len(batches), 3) self.assertEqual(batches[0].shape[2], 6) self.assertEqual(batches[0].shape[1], 97) self.assertEqual(batches[1].shape[1], 96) self.assertEqual(batches[2].shape[1], 95) self.assertEqual(sum(batches[x].shape[0] for x in range(len(batches))), 32)