Python DummyDatasetの例、returnn.datasets.generating.DummyDataset Pythonの例

コード例 #1

0

ファイルを表示

def test_iterate_seqs_no_chunking_1():
    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
    dataset.init_seq_order(1)
    seqs = list(
        dataset.iterate_seqs(chunk_size=0, chunk_step=0, used_data_keys=None))
    assert_equal(len(seqs), 2)
    assert_equal(seqs[0], (0, 0, 11))  # seq-idx, start-frame, end-frame
    assert_equal(seqs[1], (1, 0, 11))

コード例 #2

0

ファイルを表示

def test_generate_batches_recurrent():
    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=20)
    dataset.init_seq_order(1)
    batch_gen = dataset.generate_batches(recurrent_net=True,
                                         max_seqs=2,
                                         batch_size=5)
    while batch_gen.has_more():
        batch_gen.peek_next_n(1)
        batch_gen.advance(1)

コード例 #3

0

ファイルを表示

def test_hdf_create():
    hdf_filename = tempfile.mktemp(suffix=".hdf", prefix="nose-dataset-create")
    hdf_dataset = hdf_dataset_init(hdf_filename)
    assert os.path.exists(hdf_filename)

    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4)
    dataset.init_seq_order(epoch=1)

    hdf_dump_from_dataset(dataset, hdf_dataset, DictAsObj(options))
    hdf_close(hdf_dataset)

    os.remove(hdf_filename)

コード例 #4

0

ファイルを表示

def test_iterate_seqs_chunking_1():
    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
    dataset.init_seq_order(1)
    seqs = list(
        dataset.iterate_seqs(chunk_size=10, chunk_step=5, used_data_keys=None))
    for s in seqs:
        print(s)
    assert_equal(len(seqs), 6)
    assert_equal(seqs[0], (0, 0, 10))  # seq-idx, start-frame, end-frame
    assert_equal(seqs[1], (0, 5, 11))
    assert_equal(seqs[2], (0, 10, 11))
    assert_equal(seqs[3], (1, 0, 10))
    assert_equal(seqs[4], (1, 5, 11))
    assert_equal(seqs[5], (1, 10, 11))

コード例 #5

0

ファイルを表示

def test_hdf_create_unicode_labels():
    hdf_filename = tempfile.mktemp(suffix=".hdf", prefix="nose-dataset-create")
    hdf_dataset = hdf_dataset_init(hdf_filename)
    assert os.path.exists(hdf_filename)

    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4)
    assert "classes" in dataset.get_target_list()
    dataset.labels["classes"] = ['’', 'ä', 'x']  # have some Unicode chars here
    dataset.init_seq_order(epoch=1)

    hdf_dump_from_dataset(dataset, hdf_dataset, DictAsObj(options))
    hdf_close(hdf_dataset)

    os.remove(hdf_filename)

コード例 #6

0

ファイルを表示

def test_batches_context_window():
    context_window = 2
    ctx_lr = context_window - 1
    ctx_left = ctx_lr // 2
    ctx_right = ctx_lr - ctx_left

    dataset = DummyDataset(input_dim=2,
                           output_dim=3,
                           num_seqs=1,
                           seq_len=11,
                           context_window=context_window)
    dataset.init_seq_order(1)
    dataset.chunk_size = 5
    dataset.chunk_step = 5
    batch_gen = dataset.generate_batches(recurrent_net=True,
                                         max_seqs=1,
                                         batch_size=20)
    all_batches = []  # type: list[Batch]
    while batch_gen.has_more():
        batch, = batch_gen.peek_next_n(1)
        assert_is_instance(batch, Batch)
        print("batch:", batch)
        print("batch seqs:", batch.seqs)
        all_batches.append(batch)
        batch_gen.advance(1)

    # Each batch will have 1 batch-slice (max_seqs) and up to 10 frames (chunk_size).
    # For each seq, we get 3 chunks (chunk_step 5 for 11 frames).
    # Thus, 3 batches.
    assert_equal(len(all_batches), 3)
    b0, b1, b2 = all_batches
    assert isinstance(b0, Batch)
    assert isinstance(b1, Batch)
    assert isinstance(b2, Batch)

    assert_equal(b0.start_seq, 0)
    assert_equal(b0.end_seq, 1)  # exclusive
    assert_equal(len(b0.seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(b0.seqs[0].seq_idx, 0)
    assert_equal(b0.seqs[0].seq_start_frame["classes"], 0)
    assert_equal(b0.seqs[0].seq_end_frame["classes"], 5)
    assert_equal(b0.seqs[0].frame_length["classes"], 5)
    assert_equal(b0.seqs[0].seq_start_frame["data"], 0 - ctx_left)
    assert_equal(b0.seqs[0].seq_end_frame["data"], 5 + ctx_right)
    assert_equal(b0.seqs[0].frame_length["data"], 5 + ctx_lr)
    assert_equal(b0.seqs[0].batch_slice, 0)
    assert_equal(b0.seqs[0].batch_frame_offset, 0)

    assert_equal(b1.start_seq, 0)
    assert_equal(b1.end_seq, 1)  # exclusive
    assert_equal(len(b1.seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(b1.seqs[0].seq_idx, 0)
    assert_equal(b1.seqs[0].seq_start_frame["classes"], 5)
    assert_equal(b1.seqs[0].seq_end_frame["classes"], 10)
    assert_equal(b1.seqs[0].frame_length["classes"], 5)
    assert_equal(b1.seqs[0].seq_start_frame["data"], 5 - ctx_left)
    assert_equal(b1.seqs[0].seq_end_frame["data"], 10 + ctx_right)
    assert_equal(b1.seqs[0].frame_length["data"], 5 + ctx_lr)
    assert_equal(b1.seqs[0].batch_slice, 0)
    assert_equal(b1.seqs[0].batch_frame_offset, 0)

    assert_equal(b2.start_seq, 0)
    assert_equal(b2.end_seq, 1)  # exclusive
    assert_equal(len(b2.seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(b2.seqs[0].seq_idx, 0)
    assert_equal(b2.seqs[0].seq_start_frame["classes"], 10)
    assert_equal(b2.seqs[0].seq_end_frame["classes"], 11)
    assert_equal(b2.seqs[0].frame_length["classes"], 1)
    assert_equal(b2.seqs[0].seq_start_frame["data"], 10 - ctx_left)
    assert_equal(b2.seqs[0].seq_end_frame["data"], 11 + ctx_right)
    assert_equal(b2.seqs[0].frame_length["data"], 1 + ctx_lr)
    assert_equal(b2.seqs[0].batch_slice, 0)
    assert_equal(b2.seqs[0].batch_frame_offset, 0)

コード例 #7

0

ファイルを表示

def test_batches_non_recurrent_1():
    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
    dataset.init_seq_order(1)
    batch_gen = dataset.generate_batches(recurrent_net=False,
                                         max_seqs=2,
                                         batch_size=5)
    all_batches = []  # type: list[Batch]
    while batch_gen.has_more():
        batch, = batch_gen.peek_next_n(1)
        assert_is_instance(batch, Batch)
        print("batch:", batch)
        print("batch seqs:", batch.seqs)
        all_batches.append(batch)
        batch_gen.advance(1)

    # Each batch will have 5 frames (batch_size), not more, i.e. a single seq.
    # There are 2 * 11 frames in total, so 5 batches, because we concat the 2 seqs, in the non-recurrent case.
    assert_equal(len(all_batches), 5)

    assert_equal(all_batches[0].start_seq, 0)
    assert_equal(all_batches[0].end_seq, 1)  # exclusive
    assert_equal(len(all_batches[0].seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(all_batches[0].seqs[0].seq_idx, 0)
    assert_equal(all_batches[0].seqs[0].seq_start_frame, 0)
    assert_equal(all_batches[0].seqs[0].seq_end_frame, 5)
    assert_equal(all_batches[0].seqs[0].frame_length, 5)
    assert_equal(all_batches[0].seqs[0].batch_slice, 0)
    assert_equal(all_batches[0].seqs[0].batch_frame_offset, 0)

    assert_equal(all_batches[1].start_seq, 0)
    assert_equal(all_batches[1].end_seq, 1)  # exclusive
    assert_equal(len(all_batches[1].seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(all_batches[1].seqs[0].seq_idx, 0)
    assert_equal(all_batches[1].seqs[0].seq_start_frame, 5)
    assert_equal(all_batches[1].seqs[0].seq_end_frame, 10)
    assert_equal(all_batches[1].seqs[0].frame_length, 5)
    assert_equal(all_batches[1].seqs[0].batch_slice, 0)
    assert_equal(all_batches[1].seqs[0].batch_frame_offset, 0)

    assert_equal(all_batches[2].start_seq, 0)
    assert_equal(all_batches[2].end_seq, 2)  # exclusive. now both seq 0 and 1
    assert_equal(len(all_batches[2].seqs), 2)  # two copies, BatchSeqCopyPart
    assert_equal(all_batches[2].seqs[0].seq_idx, 0)
    assert_equal(all_batches[2].seqs[0].seq_start_frame, 10)
    assert_equal(all_batches[2].seqs[0].seq_end_frame, 11)
    assert_equal(all_batches[2].seqs[0].frame_length, 1)
    assert_equal(all_batches[2].seqs[0].batch_slice, 0)
    assert_equal(all_batches[2].seqs[0].batch_frame_offset, 0)
    assert_equal(all_batches[2].seqs[1].seq_idx, 1)
    assert_equal(all_batches[2].seqs[1].seq_start_frame, 0)
    assert_equal(all_batches[2].seqs[1].seq_end_frame, 4)
    assert_equal(all_batches[2].seqs[1].frame_length, 4)
    assert_equal(all_batches[2].seqs[1].batch_slice, 0)
    assert_equal(all_batches[2].seqs[1].batch_frame_offset, 1)

    assert_equal(all_batches[3].start_seq, 1)
    assert_equal(all_batches[3].end_seq, 2)  # exclusive
    assert_equal(len(all_batches[3].seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(all_batches[3].seqs[0].seq_idx, 1)
    assert_equal(all_batches[3].seqs[0].seq_start_frame, 4)
    assert_equal(all_batches[3].seqs[0].seq_end_frame, 9)
    assert_equal(all_batches[3].seqs[0].frame_length, 5)
    assert_equal(all_batches[3].seqs[0].batch_slice, 0)
    assert_equal(all_batches[3].seqs[0].batch_frame_offset, 0)

    assert_equal(all_batches[4].start_seq, 1)
    assert_equal(all_batches[4].end_seq, 2)  # exclusive
    assert_equal(len(all_batches[4].seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(all_batches[4].seqs[0].seq_idx, 1)
    assert_equal(all_batches[4].seqs[0].seq_start_frame, 9)
    assert_equal(all_batches[4].seqs[0].seq_end_frame, 11)
    assert_equal(all_batches[4].seqs[0].frame_length, 2)
    assert_equal(all_batches[4].seqs[0].batch_slice, 0)
    assert_equal(all_batches[4].seqs[0].batch_frame_offset, 0)

コード例 #8

0

ファイルを表示

def test_batches_recurrent_1():
    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
    dataset.init_seq_order(1)
    dataset.chunk_size = 10
    dataset.chunk_step = 5
    batch_gen = dataset.generate_batches(recurrent_net=True,
                                         max_seqs=1,
                                         batch_size=20)
    all_batches = []
    " :type: list[Batch] "
    while batch_gen.has_more():
        batch, = batch_gen.peek_next_n(1)
        assert_is_instance(batch, Batch)
        print("batch:", batch)
        print("batch seqs:", batch.seqs)
        all_batches.append(batch)
        batch_gen.advance(1)

    # Each batch will have 1 batch-slice (max_seqs) and up to 10 frames (chunk_size).
    # For each seq, we get 3 chunks (chunk_step 5 for 11 frames).
    # Thus, 6 batches.
    assert_equal(len(all_batches), 6)

    assert_equal(all_batches[0].start_seq, 0)
    assert_equal(all_batches[0].end_seq, 1)  # exclusive
    assert_equal(len(all_batches[0].seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(all_batches[0].seqs[0].seq_idx, 0)
    assert_equal(all_batches[0].seqs[0].seq_start_frame, 0)
    assert_equal(all_batches[0].seqs[0].seq_end_frame, 10)
    assert_equal(all_batches[0].seqs[0].frame_length, 10)
    assert_equal(all_batches[0].seqs[0].batch_slice, 0)
    assert_equal(all_batches[0].seqs[0].batch_frame_offset, 0)

    assert_equal(all_batches[1].start_seq, 0)
    assert_equal(all_batches[1].end_seq, 1)  # exclusive
    assert_equal(len(all_batches[1].seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(all_batches[1].seqs[0].seq_idx, 0)
    assert_equal(all_batches[1].seqs[0].seq_start_frame, 5)
    assert_equal(all_batches[1].seqs[0].seq_end_frame, 11)
    assert_equal(all_batches[1].seqs[0].frame_length, 6)
    assert_equal(all_batches[1].seqs[0].batch_slice, 0)
    assert_equal(all_batches[1].seqs[0].batch_frame_offset, 0)

    assert_equal(all_batches[2].start_seq, 0)
    assert_equal(all_batches[2].end_seq, 1)  # exclusive
    assert_equal(len(all_batches[2].seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(all_batches[2].seqs[0].seq_idx, 0)
    assert_equal(all_batches[2].seqs[0].seq_start_frame, 10)
    assert_equal(all_batches[2].seqs[0].seq_end_frame, 11)
    assert_equal(all_batches[2].seqs[0].frame_length, 1)
    assert_equal(all_batches[2].seqs[0].batch_slice, 0)
    assert_equal(all_batches[2].seqs[0].batch_frame_offset, 0)

    assert_equal(all_batches[3].start_seq, 1)
    assert_equal(all_batches[3].end_seq, 2)  # exclusive
    assert_equal(len(all_batches[3].seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(all_batches[3].seqs[0].seq_idx, 1)
    assert_equal(all_batches[3].seqs[0].seq_start_frame, 0)
    assert_equal(all_batches[3].seqs[0].seq_end_frame, 10)
    assert_equal(all_batches[3].seqs[0].frame_length, 10)
    assert_equal(all_batches[3].seqs[0].batch_slice, 0)
    assert_equal(all_batches[3].seqs[0].batch_frame_offset, 0)

コード例 #9

0

ファイルを表示

ファイル: test_multi_target.py プロジェクト: vieting/returnn

def test_combi_auto_enc_longer():
    config = Config()
    config.update({
        "multiprocessing": False,
        "blocking": True,
        "device": "cpu",
        "num_epochs": 1,
        "num_inputs": 3,
        "num_outputs": {
            "classes": 2
        },
        "learning_rate": 1.0,
        "adadelta": True,
        "network": {
            "output": {
                "class": "softmax",
                "loss": "ce",
                "target": "classes"
            },
            "auto-enc": {
                "class": "softmax",
                "loss": "sse",
                "dtype": "float32",
                "target": "data"
            }
        }
    })

    device = Device("cpu", config=config, blocking=True)

    # Set net params.
    def get_net_params(with_auto_enc=True):
        d = {
            "output": {
                "W_in_data_output":
                numpy.arange(0.1, 0.7, 0.1, dtype="float32").reshape((3, 2)),
                "b_output":
                numpy.arange(0.0, 2, dtype="float32")
            }
        }
        if with_auto_enc:
            d["auto-enc"] = {
                "W_in_data_auto-enc":
                numpy.arange(0.1, 1.0, 0.1, dtype="float32").reshape((3, 3)),
                "b_auto-enc":
                numpy.arange(0.0, 3, dtype="float32")
            }
        return d

    device.trainnet.set_params_by_dict(get_net_params())
    device.testnet.set_params_by_dict(get_net_params())

    # Show params.
    for p in device.trainnet.get_all_params_vars():
        print("init %s:" % p)
        pprint(p.get_value())

    # Init dataset.
    dataset = DummyDataset(input_dim=config.typed_value("num_inputs"),
                           output_dim=config.typed_value("num_outputs"),
                           num_seqs=10)
    dataset.init_seq_order()

    cost_output_sum = 0.0
    for seq_idx in range(dataset.num_seqs):
        # Copy to device allocation.
        success = assign_dev_data_single_seq(device, dataset, seq_idx)
        assert_true(success, "failed to allocate & assign data")

        # One train step.
        device.set_learning_rate(config.typed_value("learning_rate"))
        device.run("train")
        output_list, outputs_format = device.result()
        assert_is_instance(output_list, list)
        assert_true(outputs_format,
                    "for train, we should always get the format")
        outputs = Device.make_result_dict(output_list, outputs_format)
        print(("seq %i" % seq_idx))
        pprint(outputs)
        assert_in("cost:output", outputs)
        assert_in("cost:auto-enc", outputs)
        cost_output_sum += outputs["cost:output"]

    # Now, drop the auto-enc from the network, and redo the same thing.
    del config.typed_value("network")["auto-enc"]
    device = Device("cpu", config=config, blocking=True)
    device.trainnet.set_params_by_dict(get_net_params(with_auto_enc=False))
    device.testnet.set_params_by_dict(get_net_params(with_auto_enc=False))
    for p in device.trainnet.get_all_params_vars():
        print("second run, init %s:" % p)
        pprint(p.get_value())
    dataset.init_seq_order()  # reset

    cost2_output_sum = 0.0
    for seq_idx in range(dataset.num_seqs):
        # Copy to device allocation.
        success = assign_dev_data_single_seq(device, dataset, seq_idx)
        assert_true(success, "failed to allocate & assign data")

        # One train step.
        device.set_learning_rate(config.typed_value("learning_rate"))
        device.run("train")
        output_list, outputs_format = device.result()
        assert_is_instance(output_list, list)
        assert_true(outputs_format,
                    "for train, we should always get the format")
        outputs = Device.make_result_dict(output_list, outputs_format)
        print(("seq %i" % seq_idx))
        pprint(outputs)
        assert_in("cost:output", outputs)
        assert_not_in("cost:auto-enc", outputs)
        cost2_output_sum += outputs["cost:output"]

    assert_equal(cost_output_sum, cost2_output_sum)
    assert_almost_equal(cost_output_sum, 16.028842568397522, places=6)