def test_iterate_seqs_no_chunking_1(): dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11) dataset.init_seq_order(1) seqs = list( dataset.iterate_seqs(chunk_size=0, chunk_step=0, used_data_keys=None)) assert_equal(len(seqs), 2) assert_equal(seqs[0], (0, 0, 11)) # seq-idx, start-frame, end-frame assert_equal(seqs[1], (1, 0, 11))
def test_generate_batches_recurrent(): dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=20) dataset.init_seq_order(1) batch_gen = dataset.generate_batches(recurrent_net=True, max_seqs=2, batch_size=5) while batch_gen.has_more(): batch_gen.peek_next_n(1) batch_gen.advance(1)
def test_hdf_create(): hdf_filename = tempfile.mktemp(suffix=".hdf", prefix="nose-dataset-create") hdf_dataset = hdf_dataset_init(hdf_filename) assert os.path.exists(hdf_filename) dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4) dataset.init_seq_order(epoch=1) hdf_dump_from_dataset(dataset, hdf_dataset, DictAsObj(options)) hdf_close(hdf_dataset) os.remove(hdf_filename)
def test_iterate_seqs_chunking_1(): dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11) dataset.init_seq_order(1) seqs = list( dataset.iterate_seqs(chunk_size=10, chunk_step=5, used_data_keys=None)) for s in seqs: print(s) assert_equal(len(seqs), 6) assert_equal(seqs[0], (0, 0, 10)) # seq-idx, start-frame, end-frame assert_equal(seqs[1], (0, 5, 11)) assert_equal(seqs[2], (0, 10, 11)) assert_equal(seqs[3], (1, 0, 10)) assert_equal(seqs[4], (1, 5, 11)) assert_equal(seqs[5], (1, 10, 11))
def test_hdf_create_unicode_labels(): hdf_filename = tempfile.mktemp(suffix=".hdf", prefix="nose-dataset-create") hdf_dataset = hdf_dataset_init(hdf_filename) assert os.path.exists(hdf_filename) dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4) assert "classes" in dataset.get_target_list() dataset.labels["classes"] = ['’', 'ä', 'x'] # have some Unicode chars here dataset.init_seq_order(epoch=1) hdf_dump_from_dataset(dataset, hdf_dataset, DictAsObj(options)) hdf_close(hdf_dataset) os.remove(hdf_filename)
def test_batches_context_window(): context_window = 2 ctx_lr = context_window - 1 ctx_left = ctx_lr // 2 ctx_right = ctx_lr - ctx_left dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=1, seq_len=11, context_window=context_window) dataset.init_seq_order(1) dataset.chunk_size = 5 dataset.chunk_step = 5 batch_gen = dataset.generate_batches(recurrent_net=True, max_seqs=1, batch_size=20) all_batches = [] # type: list[Batch] while batch_gen.has_more(): batch, = batch_gen.peek_next_n(1) assert_is_instance(batch, Batch) print("batch:", batch) print("batch seqs:", batch.seqs) all_batches.append(batch) batch_gen.advance(1) # Each batch will have 1 batch-slice (max_seqs) and up to 10 frames (chunk_size). # For each seq, we get 3 chunks (chunk_step 5 for 11 frames). # Thus, 3 batches. assert_equal(len(all_batches), 3) b0, b1, b2 = all_batches assert isinstance(b0, Batch) assert isinstance(b1, Batch) assert isinstance(b2, Batch) assert_equal(b0.start_seq, 0) assert_equal(b0.end_seq, 1) # exclusive assert_equal(len(b0.seqs), 1) # 1 BatchSeqCopyPart assert_equal(b0.seqs[0].seq_idx, 0) assert_equal(b0.seqs[0].seq_start_frame["classes"], 0) assert_equal(b0.seqs[0].seq_end_frame["classes"], 5) assert_equal(b0.seqs[0].frame_length["classes"], 5) assert_equal(b0.seqs[0].seq_start_frame["data"], 0 - ctx_left) assert_equal(b0.seqs[0].seq_end_frame["data"], 5 + ctx_right) assert_equal(b0.seqs[0].frame_length["data"], 5 + ctx_lr) assert_equal(b0.seqs[0].batch_slice, 0) assert_equal(b0.seqs[0].batch_frame_offset, 0) assert_equal(b1.start_seq, 0) assert_equal(b1.end_seq, 1) # exclusive assert_equal(len(b1.seqs), 1) # 1 BatchSeqCopyPart assert_equal(b1.seqs[0].seq_idx, 0) assert_equal(b1.seqs[0].seq_start_frame["classes"], 5) assert_equal(b1.seqs[0].seq_end_frame["classes"], 10) assert_equal(b1.seqs[0].frame_length["classes"], 5) assert_equal(b1.seqs[0].seq_start_frame["data"], 5 - ctx_left) assert_equal(b1.seqs[0].seq_end_frame["data"], 10 + ctx_right) assert_equal(b1.seqs[0].frame_length["data"], 5 + ctx_lr) assert_equal(b1.seqs[0].batch_slice, 0) assert_equal(b1.seqs[0].batch_frame_offset, 0) assert_equal(b2.start_seq, 0) assert_equal(b2.end_seq, 1) # exclusive assert_equal(len(b2.seqs), 1) # 1 BatchSeqCopyPart assert_equal(b2.seqs[0].seq_idx, 0) assert_equal(b2.seqs[0].seq_start_frame["classes"], 10) assert_equal(b2.seqs[0].seq_end_frame["classes"], 11) assert_equal(b2.seqs[0].frame_length["classes"], 1) assert_equal(b2.seqs[0].seq_start_frame["data"], 10 - ctx_left) assert_equal(b2.seqs[0].seq_end_frame["data"], 11 + ctx_right) assert_equal(b2.seqs[0].frame_length["data"], 1 + ctx_lr) assert_equal(b2.seqs[0].batch_slice, 0) assert_equal(b2.seqs[0].batch_frame_offset, 0)
def test_batches_non_recurrent_1(): dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11) dataset.init_seq_order(1) batch_gen = dataset.generate_batches(recurrent_net=False, max_seqs=2, batch_size=5) all_batches = [] # type: list[Batch] while batch_gen.has_more(): batch, = batch_gen.peek_next_n(1) assert_is_instance(batch, Batch) print("batch:", batch) print("batch seqs:", batch.seqs) all_batches.append(batch) batch_gen.advance(1) # Each batch will have 5 frames (batch_size), not more, i.e. a single seq. # There are 2 * 11 frames in total, so 5 batches, because we concat the 2 seqs, in the non-recurrent case. assert_equal(len(all_batches), 5) assert_equal(all_batches[0].start_seq, 0) assert_equal(all_batches[0].end_seq, 1) # exclusive assert_equal(len(all_batches[0].seqs), 1) # 1 BatchSeqCopyPart assert_equal(all_batches[0].seqs[0].seq_idx, 0) assert_equal(all_batches[0].seqs[0].seq_start_frame, 0) assert_equal(all_batches[0].seqs[0].seq_end_frame, 5) assert_equal(all_batches[0].seqs[0].frame_length, 5) assert_equal(all_batches[0].seqs[0].batch_slice, 0) assert_equal(all_batches[0].seqs[0].batch_frame_offset, 0) assert_equal(all_batches[1].start_seq, 0) assert_equal(all_batches[1].end_seq, 1) # exclusive assert_equal(len(all_batches[1].seqs), 1) # 1 BatchSeqCopyPart assert_equal(all_batches[1].seqs[0].seq_idx, 0) assert_equal(all_batches[1].seqs[0].seq_start_frame, 5) assert_equal(all_batches[1].seqs[0].seq_end_frame, 10) assert_equal(all_batches[1].seqs[0].frame_length, 5) assert_equal(all_batches[1].seqs[0].batch_slice, 0) assert_equal(all_batches[1].seqs[0].batch_frame_offset, 0) assert_equal(all_batches[2].start_seq, 0) assert_equal(all_batches[2].end_seq, 2) # exclusive. now both seq 0 and 1 assert_equal(len(all_batches[2].seqs), 2) # two copies, BatchSeqCopyPart assert_equal(all_batches[2].seqs[0].seq_idx, 0) assert_equal(all_batches[2].seqs[0].seq_start_frame, 10) assert_equal(all_batches[2].seqs[0].seq_end_frame, 11) assert_equal(all_batches[2].seqs[0].frame_length, 1) assert_equal(all_batches[2].seqs[0].batch_slice, 0) assert_equal(all_batches[2].seqs[0].batch_frame_offset, 0) assert_equal(all_batches[2].seqs[1].seq_idx, 1) assert_equal(all_batches[2].seqs[1].seq_start_frame, 0) assert_equal(all_batches[2].seqs[1].seq_end_frame, 4) assert_equal(all_batches[2].seqs[1].frame_length, 4) assert_equal(all_batches[2].seqs[1].batch_slice, 0) assert_equal(all_batches[2].seqs[1].batch_frame_offset, 1) assert_equal(all_batches[3].start_seq, 1) assert_equal(all_batches[3].end_seq, 2) # exclusive assert_equal(len(all_batches[3].seqs), 1) # 1 BatchSeqCopyPart assert_equal(all_batches[3].seqs[0].seq_idx, 1) assert_equal(all_batches[3].seqs[0].seq_start_frame, 4) assert_equal(all_batches[3].seqs[0].seq_end_frame, 9) assert_equal(all_batches[3].seqs[0].frame_length, 5) assert_equal(all_batches[3].seqs[0].batch_slice, 0) assert_equal(all_batches[3].seqs[0].batch_frame_offset, 0) assert_equal(all_batches[4].start_seq, 1) assert_equal(all_batches[4].end_seq, 2) # exclusive assert_equal(len(all_batches[4].seqs), 1) # 1 BatchSeqCopyPart assert_equal(all_batches[4].seqs[0].seq_idx, 1) assert_equal(all_batches[4].seqs[0].seq_start_frame, 9) assert_equal(all_batches[4].seqs[0].seq_end_frame, 11) assert_equal(all_batches[4].seqs[0].frame_length, 2) assert_equal(all_batches[4].seqs[0].batch_slice, 0) assert_equal(all_batches[4].seqs[0].batch_frame_offset, 0)
def test_batches_recurrent_1(): dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11) dataset.init_seq_order(1) dataset.chunk_size = 10 dataset.chunk_step = 5 batch_gen = dataset.generate_batches(recurrent_net=True, max_seqs=1, batch_size=20) all_batches = [] " :type: list[Batch] " while batch_gen.has_more(): batch, = batch_gen.peek_next_n(1) assert_is_instance(batch, Batch) print("batch:", batch) print("batch seqs:", batch.seqs) all_batches.append(batch) batch_gen.advance(1) # Each batch will have 1 batch-slice (max_seqs) and up to 10 frames (chunk_size). # For each seq, we get 3 chunks (chunk_step 5 for 11 frames). # Thus, 6 batches. assert_equal(len(all_batches), 6) assert_equal(all_batches[0].start_seq, 0) assert_equal(all_batches[0].end_seq, 1) # exclusive assert_equal(len(all_batches[0].seqs), 1) # 1 BatchSeqCopyPart assert_equal(all_batches[0].seqs[0].seq_idx, 0) assert_equal(all_batches[0].seqs[0].seq_start_frame, 0) assert_equal(all_batches[0].seqs[0].seq_end_frame, 10) assert_equal(all_batches[0].seqs[0].frame_length, 10) assert_equal(all_batches[0].seqs[0].batch_slice, 0) assert_equal(all_batches[0].seqs[0].batch_frame_offset, 0) assert_equal(all_batches[1].start_seq, 0) assert_equal(all_batches[1].end_seq, 1) # exclusive assert_equal(len(all_batches[1].seqs), 1) # 1 BatchSeqCopyPart assert_equal(all_batches[1].seqs[0].seq_idx, 0) assert_equal(all_batches[1].seqs[0].seq_start_frame, 5) assert_equal(all_batches[1].seqs[0].seq_end_frame, 11) assert_equal(all_batches[1].seqs[0].frame_length, 6) assert_equal(all_batches[1].seqs[0].batch_slice, 0) assert_equal(all_batches[1].seqs[0].batch_frame_offset, 0) assert_equal(all_batches[2].start_seq, 0) assert_equal(all_batches[2].end_seq, 1) # exclusive assert_equal(len(all_batches[2].seqs), 1) # 1 BatchSeqCopyPart assert_equal(all_batches[2].seqs[0].seq_idx, 0) assert_equal(all_batches[2].seqs[0].seq_start_frame, 10) assert_equal(all_batches[2].seqs[0].seq_end_frame, 11) assert_equal(all_batches[2].seqs[0].frame_length, 1) assert_equal(all_batches[2].seqs[0].batch_slice, 0) assert_equal(all_batches[2].seqs[0].batch_frame_offset, 0) assert_equal(all_batches[3].start_seq, 1) assert_equal(all_batches[3].end_seq, 2) # exclusive assert_equal(len(all_batches[3].seqs), 1) # 1 BatchSeqCopyPart assert_equal(all_batches[3].seqs[0].seq_idx, 1) assert_equal(all_batches[3].seqs[0].seq_start_frame, 0) assert_equal(all_batches[3].seqs[0].seq_end_frame, 10) assert_equal(all_batches[3].seqs[0].frame_length, 10) assert_equal(all_batches[3].seqs[0].batch_slice, 0) assert_equal(all_batches[3].seqs[0].batch_frame_offset, 0)
def test_combi_auto_enc_longer(): config = Config() config.update({ "multiprocessing": False, "blocking": True, "device": "cpu", "num_epochs": 1, "num_inputs": 3, "num_outputs": { "classes": 2 }, "learning_rate": 1.0, "adadelta": True, "network": { "output": { "class": "softmax", "loss": "ce", "target": "classes" }, "auto-enc": { "class": "softmax", "loss": "sse", "dtype": "float32", "target": "data" } } }) device = Device("cpu", config=config, blocking=True) # Set net params. def get_net_params(with_auto_enc=True): d = { "output": { "W_in_data_output": numpy.arange(0.1, 0.7, 0.1, dtype="float32").reshape((3, 2)), "b_output": numpy.arange(0.0, 2, dtype="float32") } } if with_auto_enc: d["auto-enc"] = { "W_in_data_auto-enc": numpy.arange(0.1, 1.0, 0.1, dtype="float32").reshape((3, 3)), "b_auto-enc": numpy.arange(0.0, 3, dtype="float32") } return d device.trainnet.set_params_by_dict(get_net_params()) device.testnet.set_params_by_dict(get_net_params()) # Show params. for p in device.trainnet.get_all_params_vars(): print("init %s:" % p) pprint(p.get_value()) # Init dataset. dataset = DummyDataset(input_dim=config.typed_value("num_inputs"), output_dim=config.typed_value("num_outputs"), num_seqs=10) dataset.init_seq_order() cost_output_sum = 0.0 for seq_idx in range(dataset.num_seqs): # Copy to device allocation. success = assign_dev_data_single_seq(device, dataset, seq_idx) assert_true(success, "failed to allocate & assign data") # One train step. device.set_learning_rate(config.typed_value("learning_rate")) device.run("train") output_list, outputs_format = device.result() assert_is_instance(output_list, list) assert_true(outputs_format, "for train, we should always get the format") outputs = Device.make_result_dict(output_list, outputs_format) print(("seq %i" % seq_idx)) pprint(outputs) assert_in("cost:output", outputs) assert_in("cost:auto-enc", outputs) cost_output_sum += outputs["cost:output"] # Now, drop the auto-enc from the network, and redo the same thing. del config.typed_value("network")["auto-enc"] device = Device("cpu", config=config, blocking=True) device.trainnet.set_params_by_dict(get_net_params(with_auto_enc=False)) device.testnet.set_params_by_dict(get_net_params(with_auto_enc=False)) for p in device.trainnet.get_all_params_vars(): print("second run, init %s:" % p) pprint(p.get_value()) dataset.init_seq_order() # reset cost2_output_sum = 0.0 for seq_idx in range(dataset.num_seqs): # Copy to device allocation. success = assign_dev_data_single_seq(device, dataset, seq_idx) assert_true(success, "failed to allocate & assign data") # One train step. device.set_learning_rate(config.typed_value("learning_rate")) device.run("train") output_list, outputs_format = device.result() assert_is_instance(output_list, list) assert_true(outputs_format, "for train, we should always get the format") outputs = Device.make_result_dict(output_list, outputs_format) print(("seq %i" % seq_idx)) pprint(outputs) assert_in("cost:output", outputs) assert_not_in("cost:auto-enc", outputs) cost2_output_sum += outputs["cost:output"] assert_equal(cost_output_sum, cost2_output_sum) assert_almost_equal(cost_output_sum, 16.028842568397522, places=6)