def test_assign_dev_data(): config = Config() config.update(dummyconfig_dict) print("Create ExternSprintDataset") dataset = ExternSprintDataset( [sys.executable, sprintExecPath], "--*.feature-dimension=2 --*.trainer-output-dimension=3 --*.crnn-dataset=DummyDataset(2,3,num_seqs=4,seq_len=10)") dataset.init_seq_order(epoch=1) assert_true(dataset.is_less_than_num_seqs(0)) recurrent = False batch_generator = dataset.generate_batches(recurrent_net=recurrent, batch_size=5) batches = batch_generator.peek_next_n(2) assert_equal(len(batches), 2) if theano: print("Create Device") device = DummyDevice(config=config) success, num_batches = assign_dev_data(device, dataset, batches) assert_true(success) assert_equal(num_batches, len(batches))
def test_read_all(): config = Config() config.update(dummyconfig_dict) print("Create ExternSprintDataset") python2_exec = Util.which("python2") if python2_exec is None: raise unittest.SkipTest("python2 not found") num_seqs = 4 dataset = ExternSprintDataset( [python2_exec, sprintExecPath], "--*.feature-dimension=2 --*.trainer-output-dimension=3 " "--*.crnn-dataset=DummyDataset(2,3,num_seqs=%i,seq_len=10)" % num_seqs) dataset.init_seq_order(epoch=1) seq_idx = 0 while dataset.is_less_than_num_seqs(seq_idx): dataset.load_seqs(seq_idx, seq_idx + 1) for key in dataset.get_data_keys(): value = dataset.get_data(seq_idx, key) print("seq idx %i, data %r: %r" % (seq_idx, key, value)) seq_idx += 1 assert seq_idx == num_seqs
def test_assign_dev_data(): config = Config() config.update(dummyconfig_dict) print("Create ExternSprintDataset") dataset = ExternSprintDataset( [sys.executable, sprintExecPath], "--*.feature-dimension=2 --*.trainer-output-dimension=3 --*.crnn-dataset=DummyDataset(2,3,4)") dataset.init_seq_order(epoch=1) assert_true(dataset.is_less_than_num_seqs(0)) recurrent = False batch_generator = dataset.generate_batches(recurrent_net=recurrent, batch_size=512) batches = batch_generator.peek_next_n(2) assert_equal(len(batches), 2) print("Create Device") device = DummyDevice(config=config) success, num_batches = assign_dev_data(device, dataset, batches) assert_true(success) assert_equal(num_batches, len(batches))
def test_window(): input_dim = 2 output_dim = 3 num_seqs = 2 seq_len = 5 window = 3 dataset_kwargs = dict( sprintTrainerExecPath=[sys.executable, sprintExecPath], sprintConfigStr=" ".join([ "--*.feature-dimension=%i" % input_dim, "--*.trainer-output-dimension=%i" % output_dim, "--*.crnn-dataset=DummyDataset(input_dim=%i,output_dim=%i,num_seqs=%i,seq_len=%i)" % ( input_dim, output_dim, num_seqs, seq_len)])) dataset1 = ExternSprintDataset(**dataset_kwargs) dataset2 = ExternSprintDataset(window=window, **dataset_kwargs) try: dataset1.init_seq_order(epoch=1) dataset2.init_seq_order(epoch=1) assert_equal(dataset1.get_data_dim("data"), input_dim) assert_equal(dataset2.get_data_dim("data"), input_dim * window) data1 = dataset1.get_data(0, "data") data2 = dataset2.get_data(0, "data") assert_equal(data1.shape, (seq_len, input_dim)) assert_equal(data2.shape, (seq_len, window * input_dim)) data2a = data2.reshape(seq_len, window, input_dim) print("data1:") print(data1) print("data2:") print(data2) print("data1[0]:") print(data1[0]) print("data2[0]:") print(data2[0]) print("data2a[0,0]:") print(data2a[0, 0]) assert_equal(list(data2a[0, 0]), [0] * input_dim) # zero-padded left assert_equal(list(data2a[0, 1]), list(data1[0])) assert_equal(list(data2a[0, 2]), list(data1[1])) assert_equal(list(data2a[1, 0]), list(data1[0])) assert_equal(list(data2a[1, 1]), list(data1[1])) assert_equal(list(data2a[1, 2]), list(data1[2])) assert_equal(list(data2a[-1, 2]), [0] * input_dim) # zero-padded right finally: dataset1.exit_handler() dataset2.exit_handler()
def test_window(): input_dim = 2 output_dim = 3 num_seqs = 2 seq_len = 5 window = 3 dataset_kwargs = dict( sprintTrainerExecPath=[sys.executable, sprintExecPath], sprintConfigStr=" ".join([ "--*.feature-dimension=%i" % input_dim, "--*.trainer-output-dimension=%i" % output_dim, "--*.crnn-dataset=DummyDataset(input_dim=%i,output_dim=%i,num_seqs=%i,seq_len=%i)" % ( input_dim, output_dim, num_seqs, seq_len)])) dataset1 = ExternSprintDataset(**dataset_kwargs) dataset2 = ExternSprintDataset(window=window, **dataset_kwargs) try: dataset1.init_seq_order(epoch=1) dataset2.init_seq_order(epoch=1) dataset1.load_seqs(0, 1) dataset2.load_seqs(0, 1) assert_equal(dataset1.get_data_dim("data"), input_dim) assert_equal(dataset2.get_data_dim("data"), input_dim * window) data1 = dataset1.get_data(0, "data") data2 = dataset2.get_data(0, "data") assert_equal(data1.shape, (seq_len, input_dim)) assert_equal(data2.shape, (seq_len, window * input_dim)) data2a = data2.reshape(seq_len, window, input_dim) print("data1:") print(data1) print("data2:") print(data2) print("data1[0]:") print(data1[0]) print("data2[0]:") print(data2[0]) print("data2a[0,0]:") print(data2a[0, 0]) assert_equal(list(data2a[0, 0]), [0] * input_dim) # zero-padded left assert_equal(list(data2a[0, 1]), list(data1[0])) assert_equal(list(data2a[0, 2]), list(data1[1])) assert_equal(list(data2a[1, 0]), list(data1[0])) assert_equal(list(data2a[1, 1]), list(data1[1])) assert_equal(list(data2a[1, 2]), list(data1[2])) assert_equal(list(data2a[-1, 2]), [0] * input_dim) # zero-padded right finally: dataset1._exit_handler() dataset2._exit_handler()