Beispiel #1
0
def test_engine_analyze():
    from GeneratingDataset import DummyDataset
    seq_len = 5
    n_data_dim = 2
    n_classes_dim = 3
    dataset = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=2,
                           seq_len=seq_len)
    dataset.init_seq_order(epoch=1)

    config = Config()
    config.update({
        "model": "/tmp/model",
        "num_outputs": n_classes_dim,
        "num_inputs": n_data_dim,
        "network": {
            "output": {
                "class": "softmax",
                "loss": "ce"
            }
        },
        "sil_label_idx": 0,
    })
    engine = Engine(config=config)
    # Normally init_network_from_config but that requires an existing network model.
    # engine.init_network_from_config(config=config)
    engine.init_train_from_config(config=config,
                                  train_data=dataset,
                                  dev_data=None,
                                  eval_data=None)

    engine.analyze(data=dataset, statistics=None)
Beispiel #2
0
def test_engine_forward_single():
    from GeneratingDataset import DummyDataset
    seq_len = 5
    n_data_dim = 2
    n_classes_dim = 3
    dataset = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=2,
                           seq_len=seq_len)
    dataset.init_seq_order(epoch=1)

    config = Config()
    config.update({
        "model": "/tmp/model",
        "num_outputs": n_classes_dim,
        "num_inputs": n_data_dim,
        "network": {
            "output": {
                "class": "softmax",
                "loss": "ce"
            }
        }
    })
    engine = Engine(config=config)
    engine.init_train_from_config(config=config,
                                  train_data=dataset,
                                  dev_data=None,
                                  eval_data=None)

    engine.forward_single(dataset=dataset, seq_idx=0)
Beispiel #3
0
def test_iterate_seqs_no_chunking_1():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
  dataset.init_seq_order(1)
  seqs = list(dataset._iterate_seqs(chunk_size=0, chunk_step=0))
  assert_equal(len(seqs), 2)
  assert_equal(seqs[0], (0, 0, 11))  # seq-idx, start-frame, end-frame
  assert_equal(seqs[1], (1, 0, 11))
Beispiel #4
0
def test_generate_batches_recurrent():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=20)
  dataset.init_seq_order(1)
  batch_gen = dataset.generate_batches(recurrent_net=True, max_seqs=2, batch_size=5)
  while batch_gen.has_more():
    batch_gen.peek_next_n(1)
    batch_gen.advance(1)
Beispiel #5
0
def test_engine_forward_to_hdf():
    from GeneratingDataset import DummyDataset
    import tempfile
    output_file = tempfile.mktemp(suffix=".hdf", prefix="nose-tf-forward")
    seq_len = 5
    n_data_dim = 2
    n_classes_dim = 3
    num_seqs = 20
    dataset = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=num_seqs,
                           seq_len=seq_len)
    dataset.init_seq_order(epoch=1)

    config = Config()
    config.update({
        "model": "/tmp/model",
        "num_outputs": n_classes_dim,
        "num_inputs": n_data_dim,
        "network": {
            "output": {
                "class": "softmax",
                "loss": "ce"
            }
        },
        "output_file": output_file,
    })

    engine = Engine(config=config)
    engine.init_train_from_config(
        config=config,
        train_data=dataset,
        dev_data=None,
        eval_data=None,
    )

    engine.forward_to_hdf(data=dataset, output_file=output_file, batch_size=5)
    assert os.path.exists(output_file)
    import h5py
    with h5py.File(output_file, 'r') as f:
        assert f['inputs'].shape == (seq_len * num_seqs, n_classes_dim)
        assert f['seqLengths'].shape == (num_seqs, 2)
        assert f['seqTags'].shape == (num_seqs, )
        assert f.attrs['inputPattSize'] == n_data_dim
        assert f.attrs['numSeqs'] == num_seqs
        assert f.attrs['numTimesteps'] == seq_len * num_seqs

    from HDFDataset import HDFDataset
    ds = HDFDataset()
    ds.add_file(output_file)

    assert_equal(ds.num_inputs,
                 n_classes_dim)  # forwarded input is network output
    assert_equal(ds.get_num_timesteps(), seq_len * num_seqs)
    assert_equal(ds.num_seqs, num_seqs)

    os.remove(output_file)
Beispiel #6
0
def test_batches_non_recurrent_1():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
  dataset.init_seq_order(1)
  batch_gen = dataset.generate_batches(recurrent_net=False, max_seqs=2, batch_size=5)
  all_batches = []; " :type: list[Batch] "
  while batch_gen.has_more():
    batch, = batch_gen.peek_next_n(1)
    assert_is_instance(batch, Batch)
    all_batches.append(batch)
    batch_gen.advance(1)

  # Each batch will have 5 frames (batch_size), not more, i.e. a single seq.
  # There are 2 * 11 frames in total, so 5 batches, because we concat the 2 seqs, in the non-recurrent case.
  assert_equal(len(all_batches), 5)

  assert_equal(all_batches[0].start_seq, 0)
  assert_equal(all_batches[0].end_seq, 1)  # exclusive
  assert_equal(len(all_batches[0].seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(all_batches[0].seqs[0].seq_idx, 0)
  assert_equal(all_batches[0].seqs[0].seq_start_frame, 0)
  assert_equal(all_batches[0].seqs[0].seq_end_frame, 5)
  assert_equal(all_batches[0].seqs[0].frame_length, 5)
  assert_equal(all_batches[0].seqs[0].batch_slice, 0)
  assert_equal(all_batches[0].seqs[0].batch_frame_offset, 0)

  assert_equal(all_batches[1].start_seq, 0)
  assert_equal(all_batches[1].end_seq, 1)  # exclusive
  assert_equal(len(all_batches[1].seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(all_batches[1].seqs[0].seq_idx, 0)
  assert_equal(all_batches[1].seqs[0].seq_start_frame, 5)
  assert_equal(all_batches[1].seqs[0].seq_end_frame, 10)
  assert_equal(all_batches[1].seqs[0].frame_length, 5)
  assert_equal(all_batches[1].seqs[0].batch_slice, 0)
  assert_equal(all_batches[1].seqs[0].batch_frame_offset, 0)

  assert_equal(all_batches[2].start_seq, 0)
  assert_equal(all_batches[2].end_seq, 2)  # exclusive. now both seq 0 and 1
  assert_equal(len(all_batches[2].seqs), 2)  # two copies, BatchSeqCopyPart
  assert_equal(all_batches[2].seqs[0].seq_idx, 0)
  assert_equal(all_batches[2].seqs[0].seq_start_frame, 10)
  assert_equal(all_batches[2].seqs[0].seq_end_frame, 11)
  assert_equal(all_batches[2].seqs[0].frame_length, 1)
  assert_equal(all_batches[2].seqs[0].batch_slice, 0)
  assert_equal(all_batches[2].seqs[0].batch_frame_offset, 0)

  assert_equal(all_batches[3].start_seq, 1)
  assert_equal(all_batches[3].end_seq, 2)  # exclusive
  assert_equal(len(all_batches[3].seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(all_batches[3].seqs[0].seq_idx, 1)
  assert_equal(all_batches[3].seqs[0].seq_start_frame, 0)
  assert_equal(all_batches[3].seqs[0].seq_end_frame, 5)
  assert_equal(all_batches[3].seqs[0].frame_length, 5)
  assert_equal(all_batches[3].seqs[0].batch_slice, 0)
  assert_equal(all_batches[3].seqs[0].batch_frame_offset, 0)
def test_hdf_create():
    hdf_filename = tempfile.mktemp(suffix=".hdf", prefix="nose-dataset-create")
    hdf_dataset = hdf_dataset_init(hdf_filename)
    assert os.path.exists(hdf_filename)

    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4)
    dataset.init_seq_order(epoch=1)

    hdf_dump_from_dataset(dataset, hdf_dataset, DictAsObj(options))
    hdf_close(hdf_dataset)

    os.remove(hdf_filename)
Beispiel #8
0
def test_hdf_create():
  hdf_filename = tempfile.mktemp(suffix=".hdf", prefix="nose-dataset-create")
  hdf_dataset = hdf_dataset_init(hdf_filename)
  assert os.path.exists(hdf_filename)

  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4)
  dataset.init_seq_order(epoch=1)

  hdf_dump_from_dataset(dataset, hdf_dataset, DictAsObj(options))
  hdf_close(hdf_dataset)

  os.remove(hdf_filename)
Beispiel #9
0
def test_iterate_seqs_chunking_1():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
  dataset.init_seq_order(1)
  seqs = list(dataset._iterate_seqs(chunk_size=10, chunk_step=5))
  for s in seqs:
    print(s)
  assert_equal(len(seqs), 6)
  assert_equal(seqs[0], (0, 0, 10))  # seq-idx, start-frame, end-frame
  assert_equal(seqs[1], (0, 5, 11))
  assert_equal(seqs[2], (0, 10, 11))
  assert_equal(seqs[3], (1, 0, 10))
  assert_equal(seqs[4], (1, 5, 11))
  assert_equal(seqs[5], (1, 10, 11))
Beispiel #10
0
def test_hdf_create_unicode_labels():
  hdf_filename = tempfile.mktemp(suffix=".hdf", prefix="nose-dataset-create")
  hdf_dataset = hdf_dataset_init(hdf_filename)
  assert os.path.exists(hdf_filename)

  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4)
  assert "classes" in dataset.get_target_list()
  dataset.labels["classes"] = ['’', 'ä', 'x']  # have some Unicode chars here
  dataset.init_seq_order(epoch=1)

  hdf_dump_from_dataset(dataset, hdf_dataset, DictAsObj(options))
  hdf_close(hdf_dataset)

  os.remove(hdf_filename)
Beispiel #11
0
def test_engine_rec_subnet_count():
    from GeneratingDataset import DummyDataset
    seq_len = 5
    # The dataset is actually not used.
    n_data_dim = 2
    n_classes_dim = 3
    dataset = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=2,
                           seq_len=seq_len)
    dataset.init_seq_order(epoch=1)

    config = Config()
    config.update({
        "model": "/tmp/model",
        "num_outputs": n_classes_dim,
        "num_inputs": n_data_dim,
        "network": {
            "output": {
                "class": "rec",
                "from":
                ["data"
                 ],  # actually not used, except that it defines the length
                "unit": {
                    "output": {
                        "class": "activation",
                        "activation": "identity + 1",
                        "from": ["prev:output"],
                        "initial_output":
                        0,  # note: initial output is for t == -1
                        "out_type": {
                            "dim": 1,
                            "dtype": "int32"
                        }
                    }
                }
            }
        }
    })
    engine = Engine(config=config)
    engine.init_train_from_config(config=config,
                                  train_data=dataset,
                                  dev_data=None,
                                  eval_data=None)

    out = engine.forward_single(dataset=dataset, seq_idx=0)
    assert_equal(out.shape, (seq_len, 1))
    assert_equal(out.dtype, numpy.int32)
    assert_equal(list(out[:, 0]), list(range(1, seq_len + 1)))
Beispiel #12
0
def test_batches_recurrent_1():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
  dataset.init_seq_order(1)
  dataset.chunk_size = 10
  dataset.chunk_step = 5
  batch_gen = dataset.generate_batches(recurrent_net=True, max_seqs=1, batch_size=20)
  all_batches = []; " :type: list[Batch] "
  while batch_gen.has_more():
    batch, = batch_gen.peek_next_n(1)
    assert_is_instance(batch, Batch)
    print("batch:", batch)
    print("batch seqs:", batch.seqs)
    all_batches.append(batch)
    batch_gen.advance(1)

  # Each batch will have 1 batch-slice (max_seqs) and up to 10 frames (chunk_size).
  # For each seq, we get 3 chunks (chunk_step 5 for 11 frames).
  # Thus, 6 batches.
  assert_equal(len(all_batches), 6)

  assert_equal(all_batches[0].start_seq, 0)
  assert_equal(all_batches[0].end_seq, 1)  # exclusive
  assert_equal(len(all_batches[0].seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(all_batches[0].seqs[0].seq_idx, 0)
  assert_equal(all_batches[0].seqs[0].seq_start_frame, 0)
  assert_equal(all_batches[0].seqs[0].seq_end_frame, 10)
  assert_equal(all_batches[0].seqs[0].frame_length, 10)
  assert_equal(all_batches[0].seqs[0].batch_slice, 0)
  assert_equal(all_batches[0].seqs[0].batch_frame_offset, 0)

  assert_equal(all_batches[1].start_seq, 0)
  assert_equal(all_batches[1].end_seq, 1)  # exclusive
  assert_equal(len(all_batches[1].seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(all_batches[1].seqs[0].seq_idx, 0)
  assert_equal(all_batches[1].seqs[0].seq_start_frame, 5)
  assert_equal(all_batches[1].seqs[0].seq_end_frame, 11)
  assert_equal(all_batches[1].seqs[0].frame_length, 6)
  assert_equal(all_batches[1].seqs[0].batch_slice, 0)
  assert_equal(all_batches[1].seqs[0].batch_frame_offset, 0)

  assert_equal(all_batches[2].start_seq, 0)
  assert_equal(all_batches[2].end_seq, 1)  # exclusive
  assert_equal(len(all_batches[2].seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(all_batches[2].seqs[0].seq_idx, 0)
  assert_equal(all_batches[2].seqs[0].seq_start_frame, 10)
  assert_equal(all_batches[2].seqs[0].seq_end_frame, 11)
  assert_equal(all_batches[2].seqs[0].frame_length, 1)
  assert_equal(all_batches[2].seqs[0].batch_slice, 0)
  assert_equal(all_batches[2].seqs[0].batch_frame_offset, 0)

  assert_equal(all_batches[3].start_seq, 1)
  assert_equal(all_batches[3].end_seq, 2)  # exclusive
  assert_equal(len(all_batches[3].seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(all_batches[3].seqs[0].seq_idx, 1)
  assert_equal(all_batches[3].seqs[0].seq_start_frame, 0)
  assert_equal(all_batches[3].seqs[0].seq_end_frame, 10)
  assert_equal(all_batches[3].seqs[0].frame_length, 10)
  assert_equal(all_batches[3].seqs[0].batch_slice, 0)
  assert_equal(all_batches[3].seqs[0].batch_frame_offset, 0)
Beispiel #13
0
def test_hdf_create_unicode_labels():
    hdf_filename = tempfile.mktemp(suffix=".hdf", prefix="nose-dataset-create")
    hdf_dataset = hdf_dataset_init(hdf_filename)
    assert os.path.exists(hdf_filename)

    dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4)
    assert "classes" in dataset.get_target_list()
    dataset.labels["classes"] = ['’', 'ä', 'x']  # have some Unicode chars here
    dataset.init_seq_order(epoch=1)

    hdf_dump_from_dataset(dataset, hdf_dataset, DictAsObj(options))
    hdf_close(hdf_dataset)

    os.remove(hdf_filename)
Beispiel #14
0
def test_engine_train():
    from GeneratingDataset import DummyDataset
    seq_len = 5
    n_data_dim = 2
    n_classes_dim = 3
    train_data = DummyDataset(input_dim=n_data_dim,
                              output_dim=n_classes_dim,
                              num_seqs=4,
                              seq_len=seq_len)
    train_data.init_seq_order(epoch=1)
    cv_data = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=2,
                           seq_len=seq_len)
    cv_data.init_seq_order(epoch=1)

    config = Config()
    config.update({
        "model": "/tmp/model",
        "num_outputs": n_classes_dim,
        "num_inputs": n_data_dim,
        "network": {
            "output": {
                "class": "softmax",
                "loss": "ce"
            }
        },
        "start_epoch": 1,
        "num_epochs": 2
    })
    engine = Engine(config=config)
    engine.init_train_from_config(config=config,
                                  train_data=train_data,
                                  dev_data=cv_data,
                                  eval_data=None)
    engine.train()
Beispiel #15
0
def test_engine_search_attention():
    from GeneratingDataset import DummyDataset
    seq_len = 5
    n_data_dim = 2
    n_classes_dim = 3
    dataset = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=2,
                           seq_len=seq_len)
    dataset.init_seq_order(epoch=1)
    print("Hello search!")

    config = Config()
    config.update({
        "model": "/tmp/model",
        "batch_size": 5000,
        "max_seqs": 2,
        "num_outputs": n_classes_dim,
        "num_inputs": n_data_dim,
        "network": {
            "encoder": {
                "class": "linear",
                "activation": "tanh",
                "n_out": 5
            },
            "output": {
                "class": "rec",
                "from": [],
                "unit": {
                    'output': {
                        'class': 'choice',
                        'target': 'classes',
                        'beam_size': 4,
                        'from': ["output_prob"]
                    },
                    "end": {
                        "class": "compare",
                        "from": ["output"],
                        "value": 0
                    },
                    'orth_embed': {
                        'class': 'linear',
                        'activation': None,
                        'from': ['output'],
                        "n_out": 7
                    },
                    "s": {
                        "class": "rnn_cell",
                        "unit": "LSTMBlock",
                        "from": ["prev:c", "prev:orth_embed"],
                        "n_out": 7
                    },
                    "c_in": {
                        "class": "linear",
                        "activation": "tanh",
                        "from": ["s", "prev:orth_embed"],
                        "n_out": 5
                    },
                    "c": {
                        "class": "dot_attention",
                        "from": ["c_in"],
                        "base": "base:encoder",
                        "base_ctx": "base:encoder"
                    },
                    "output_prob": {
                        "class": "softmax",
                        "from": ["prev:s", "c"],
                        "target": "classes",
                        "loss": "ce"
                    }
                },
                "target": "classes",
                "max_seq_len": 10
            },
            "decision": {
                "class": "decide",
                "from": ["output"],
                "loss": "edit_distance"
            }
        }
    })
    engine = Engine(config=config)
    print("Init network...")
    engine.start_epoch = 1
    engine.use_dynamic_train_flag = False
    engine.use_search_flag = True
    engine.init_network_from_config(config)
    print("network:")
    pprint(engine.network.layers)
    assert "output" in engine.network.layers
    assert "decision" in engine.network.layers

    print("Search...")
    engine.search(dataset=dataset)
    print("error keys:")
    pprint(engine.network.error_by_layer)
    assert engine.network.total_objective is not None
    assert "decision" in engine.network.error_by_layer

    engine.finalize()
Beispiel #16
0
def test_DataProvider():
    """
  :param Dataset.Dataset dataset:
  :param int seq_idx:
  :param str|None output_layer_name: e.g. "output". if not set, will read from config "forward_output_layer"
  :return: numpy array, output in time major format (time,batch,dim)
  :rtype: numpy.ndarray
  """
    from GeneratingDataset import DummyDataset
    seq_len = 5
    n_data_dim = 2
    n_classes_dim = 3
    dataset = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=2,
                           seq_len=seq_len)
    dataset.init_seq_order(epoch=1)

    extern_data = ExternData()
    extern_data.init_from_dataset(dataset)

    # No Runner instance here but a very simplified version of Runner.run().
    # First we need a custom DataProvider with a custom BatchSetGenerator
    # which will yield only one single batch for the provided sequence idx.
    seq_idx = 0
    n_batch = 1
    batch = Batch()
    batch.add_frames(seq_idx=seq_idx,
                     seq_start_frame=0,
                     length=dataset.get_seq_length(seq_idx))
    batch_generator = iter([batch])
    batches = BatchSetGenerator(dataset, generator=batch_generator)
    from TFDataPipeline import FeedDictDataProvider
    data_provider = FeedDictDataProvider(tf_session=session,
                                         extern_data=extern_data,
                                         data_keys=["data", "classes"],
                                         dataset=dataset,
                                         batches=batches)

    feed_dict = data_provider.get_feed_dict(single_threaded=True)
    print(feed_dict)
    assert_is_instance(feed_dict, dict)
    assert extern_data.data["data"].placeholder in feed_dict
    assert extern_data.data["data"].size_placeholder[0] in feed_dict
    assert extern_data.data["classes"].placeholder in feed_dict
    assert extern_data.data["classes"].size_placeholder[0] in feed_dict
    data = feed_dict[extern_data.data["data"].placeholder]
    data_size = feed_dict[extern_data.data["data"].size_placeholder[0]]
    classes = feed_dict[extern_data.data["classes"].placeholder]
    classes_size = feed_dict[extern_data.data["classes"].size_placeholder[0]]
    assert_is_instance(data, numpy.ndarray)
    assert_is_instance(data_size, numpy.ndarray)
    assert_is_instance(classes, numpy.ndarray)
    assert_is_instance(classes_size, numpy.ndarray)
    assert_equal(data.shape, (n_batch, seq_len, n_data_dim))
    assert_equal(data_size.shape, (n_batch, ))
    assert_equal(classes.shape, (n_batch, seq_len))
    assert_equal(classes_size.shape, (n_batch, ))
    assert_equal(list(data_size), [seq_len])
    assert_equal(list(classes_size), [seq_len])
    numpy.testing.assert_almost_equal(list(data[0, 0]), [-0.5, -0.4])
    numpy.testing.assert_almost_equal(list(data[0, -1]), [0.3, 0.4])
    assert_equal(classes.tolist(), [[1, 2, 0, 1, 2]])
Beispiel #17
0
def test_engine_search():
    from GeneratingDataset import DummyDataset
    seq_len = 5
    n_data_dim = 2
    n_classes_dim = 3
    dataset = DummyDataset(input_dim=n_data_dim,
                           output_dim=n_classes_dim,
                           num_seqs=2,
                           seq_len=seq_len)
    dataset.init_seq_order(epoch=1)

    config = Config()
    config.update({
        "model": "/tmp/model",
        "batch_size": 5000,
        "num_outputs": n_classes_dim,
        "num_inputs": n_data_dim,
        "network": {
            "output": {
                "class": "rec",
                "from": [],
                "max_seq_len": 10,
                "target": "classes",
                "unit": {
                    "prob": {
                        "class": "softmax",
                        "from": ["prev:output"],
                        "loss": "ce",
                        "target": "classes"
                    },
                    "output": {
                        "class": "choice",
                        "beam_size": 4,
                        "from": ["prob"],
                        "target": "classes",
                        "initial_output": 0
                    },
                    "end": {
                        "class": "compare",
                        "from": ["output"],
                        "value": 0
                    }
                }
            },
            "decision": {
                "class": "decide",
                "from": ["output"],
                "loss": "edit_distance"
            }
        }
    })
    engine = Engine(config=config)
    # Normally init_network can be used. We only do init_train here to randomly initialize the network.
    engine.init_train_from_config(config=config,
                                  train_data=dataset,
                                  dev_data=None,
                                  eval_data=None)
    print("network:")
    pprint(engine.network.layers)
    assert "output" in engine.network.layers
    assert "decision" in engine.network.layers

    engine.search(dataset=dataset)
    print("error keys:")
    pprint(engine.network.error_by_layer)
    assert engine.network.total_objective is not None
    assert "decision" in engine.network.error_by_layer

    engine.finalize()
Beispiel #18
0
def test_combi_auto_enc_longer():
  config = Config()
  config.update({
    "multiprocessing": False,
    "blocking": True,
    "device": "cpu",
    "num_epochs": 1,
    "num_inputs": 3,
    "num_outputs": {"classes": 2},
    "learning_rate": 1.0,
    "adadelta": True,
    "network": {
      "output": {"class": "softmax", "loss": "ce", "target": "classes"},
      "auto-enc": {"class": "softmax", "loss": "sse", "dtype": "float32", "target": "data"}
    }
  })

  device = Device("cpu", config=config, blocking=True)

  # Set net params.
  def get_net_params(with_auto_enc=True):
    d = {
      "output": {"W_in_data_output": numpy.arange(0.1, 0.7, 0.1, dtype="float32").reshape((3, 2)),
                 "b_output": numpy.arange(0.0, 2, dtype="float32")}
    }
    if with_auto_enc:
      d["auto-enc"] = {"W_in_data_auto-enc": numpy.arange(0.1, 1.0, 0.1, dtype="float32").reshape((3, 3)),
                       "b_auto-enc": numpy.arange(0.0, 3, dtype="float32")}
    return d
  device.trainnet.set_params_by_dict(get_net_params())
  device.testnet.set_params_by_dict(get_net_params())

  # Show params.
  for p in device.trainnet.get_all_params_vars():
    print "init %s:" % p
    pprint(p.get_value())

  # Init dataset.
  dataset = DummyDataset(input_dim=config.typed_value("num_inputs"),
                         output_dim=config.typed_value("num_outputs"),
                         num_seqs=10)
  dataset.init_seq_order()

  cost_output_sum = 0.0
  for seq_idx in range(dataset.num_seqs):
    # Copy to device allocation.
    success = assign_dev_data_single_seq(device, dataset, seq_idx)
    assert_true(success, "failed to allocate & assign data")

    # One train step.
    device.set_learning_rate(config.typed_value("learning_rate"))
    device.run("train")
    output_list, outputs_format = device.result()
    assert_is_instance(output_list, list)
    assert_true(outputs_format, "for train, we should always get the format")
    outputs = Device.make_result_dict(output_list, outputs_format)
    print("seq %i" % seq_idx)
    pprint(outputs)
    assert_in("cost:output", outputs)
    assert_in("cost:auto-enc", outputs)
    cost_output_sum += outputs["cost:output"]

  # Now, drop the auto-enc from the network, and redo the same thing.
  del config.typed_value("network")["auto-enc"]
  device = Device("cpu", config=config, blocking=True)
  device.trainnet.set_params_by_dict(get_net_params(with_auto_enc=False))
  device.testnet.set_params_by_dict(get_net_params(with_auto_enc=False))
  for p in device.trainnet.get_all_params_vars():
    print "second run, init %s:" % p
    pprint(p.get_value())
  dataset.init_seq_order()  # reset

  cost2_output_sum = 0.0
  for seq_idx in range(dataset.num_seqs):
    # Copy to device allocation.
    success = assign_dev_data_single_seq(device, dataset, seq_idx)
    assert_true(success, "failed to allocate & assign data")

    # One train step.
    device.set_learning_rate(config.typed_value("learning_rate"))
    device.run("train")
    output_list, outputs_format = device.result()
    assert_is_instance(output_list, list)
    assert_true(outputs_format, "for train, we should always get the format")
    outputs = Device.make_result_dict(output_list, outputs_format)
    print("seq %i" % seq_idx)
    pprint(outputs)
    assert_in("cost:output", outputs)
    assert_not_in("cost:auto-enc", outputs)
    cost2_output_sum += outputs["cost:output"]

  assert_equal(cost_output_sum, cost2_output_sum)
  assert_almost_equal(cost_output_sum, 16.028842568397522, places=6)
def test_load_seqs():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4)
  dataset.init_seq_order(epoch=1)
  dataset.load_seqs(0, 1)
  dataset.load_seqs(1, 3)
def test_init():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=4)
  assert_equal(dataset.num_inputs, 2)
  assert_equal(dataset.num_outputs, {"classes": [3, 1], "data": [2, 2]})
  assert_equal(dataset.num_seqs, 4)
Beispiel #21
0
def test_batches_context_window():
    context_window = 2
    ctx_lr = context_window - 1
    ctx_left = ctx_lr // 2
    ctx_right = ctx_lr - ctx_left

    dataset = DummyDataset(input_dim=2,
                           output_dim=3,
                           num_seqs=1,
                           seq_len=11,
                           context_window=context_window)
    dataset.init_seq_order(1)
    dataset.chunk_size = 5
    dataset.chunk_step = 5
    batch_gen = dataset.generate_batches(recurrent_net=True,
                                         max_seqs=1,
                                         batch_size=20)
    all_batches = []  # type: list[Batch]
    while batch_gen.has_more():
        batch, = batch_gen.peek_next_n(1)
        assert_is_instance(batch, Batch)
        print("batch:", batch)
        print("batch seqs:", batch.seqs)
        all_batches.append(batch)
        batch_gen.advance(1)

    # Each batch will have 1 batch-slice (max_seqs) and up to 10 frames (chunk_size).
    # For each seq, we get 3 chunks (chunk_step 5 for 11 frames).
    # Thus, 3 batches.
    assert_equal(len(all_batches), 3)
    b0, b1, b2 = all_batches
    assert isinstance(b0, Batch)
    assert isinstance(b1, Batch)
    assert isinstance(b2, Batch)

    assert_equal(b0.start_seq, 0)
    assert_equal(b0.end_seq, 1)  # exclusive
    assert_equal(len(b0.seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(b0.seqs[0].seq_idx, 0)
    assert_equal(b0.seqs[0].seq_start_frame["classes"], 0)
    assert_equal(b0.seqs[0].seq_end_frame["classes"], 5)
    assert_equal(b0.seqs[0].frame_length["classes"], 5)
    assert_equal(b0.seqs[0].seq_start_frame["data"], 0 - ctx_left)
    assert_equal(b0.seqs[0].seq_end_frame["data"], 5 + ctx_right)
    assert_equal(b0.seqs[0].frame_length["data"], 5 + ctx_lr)
    assert_equal(b0.seqs[0].batch_slice, 0)
    assert_equal(b0.seqs[0].batch_frame_offset, 0)

    assert_equal(b1.start_seq, 0)
    assert_equal(b1.end_seq, 1)  # exclusive
    assert_equal(len(b1.seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(b1.seqs[0].seq_idx, 0)
    assert_equal(b1.seqs[0].seq_start_frame["classes"], 5)
    assert_equal(b1.seqs[0].seq_end_frame["classes"], 10)
    assert_equal(b1.seqs[0].frame_length["classes"], 5)
    assert_equal(b1.seqs[0].seq_start_frame["data"], 5 - ctx_left)
    assert_equal(b1.seqs[0].seq_end_frame["data"], 10 + ctx_right)
    assert_equal(b1.seqs[0].frame_length["data"], 5 + ctx_lr)
    assert_equal(b1.seqs[0].batch_slice, 0)
    assert_equal(b1.seqs[0].batch_frame_offset, 0)

    assert_equal(b2.start_seq, 0)
    assert_equal(b2.end_seq, 1)  # exclusive
    assert_equal(len(b2.seqs), 1)  # 1 BatchSeqCopyPart
    assert_equal(b2.seqs[0].seq_idx, 0)
    assert_equal(b2.seqs[0].seq_start_frame["classes"], 10)
    assert_equal(b2.seqs[0].seq_end_frame["classes"], 11)
    assert_equal(b2.seqs[0].frame_length["classes"], 1)
    assert_equal(b2.seqs[0].seq_start_frame["data"], 10 - ctx_left)
    assert_equal(b2.seqs[0].seq_end_frame["data"], 11 + ctx_right)
    assert_equal(b2.seqs[0].frame_length["data"], 1 + ctx_lr)
    assert_equal(b2.seqs[0].batch_slice, 0)
    assert_equal(b2.seqs[0].batch_frame_offset, 0)
Beispiel #22
0
def test_combi_auto_enc_longer():
    config = Config()
    config.update({
        "multiprocessing": False,
        "blocking": True,
        "device": "cpu",
        "num_epochs": 1,
        "num_inputs": 3,
        "num_outputs": {
            "classes": 2
        },
        "learning_rate": 1.0,
        "adadelta": True,
        "network": {
            "output": {
                "class": "softmax",
                "loss": "ce",
                "target": "classes"
            },
            "auto-enc": {
                "class": "softmax",
                "loss": "sse",
                "dtype": "float32",
                "target": "data"
            }
        }
    })

    device = Device("cpu", config=config, blocking=True)

    # Set net params.
    def get_net_params(with_auto_enc=True):
        d = {
            "output": {
                "W_in_data_output":
                numpy.arange(0.1, 0.7, 0.1, dtype="float32").reshape((3, 2)),
                "b_output":
                numpy.arange(0.0, 2, dtype="float32")
            }
        }
        if with_auto_enc:
            d["auto-enc"] = {
                "W_in_data_auto-enc":
                numpy.arange(0.1, 1.0, 0.1, dtype="float32").reshape((3, 3)),
                "b_auto-enc":
                numpy.arange(0.0, 3, dtype="float32")
            }
        return d

    device.trainnet.set_params_by_dict(get_net_params())
    device.testnet.set_params_by_dict(get_net_params())

    # Show params.
    for p in device.trainnet.get_all_params_vars():
        print "init %s:" % p
        pprint(p.get_value())

    # Init dataset.
    dataset = DummyDataset(input_dim=config.typed_value("num_inputs"),
                           output_dim=config.typed_value("num_outputs"),
                           num_seqs=10)
    dataset.init_seq_order()

    cost_output_sum = 0.0
    for seq_idx in range(dataset.num_seqs):
        # Copy to device allocation.
        success = assign_dev_data_single_seq(device, dataset, seq_idx)
        assert_true(success, "failed to allocate & assign data")

        # One train step.
        device.set_learning_rate(config.typed_value("learning_rate"))
        device.run("train")
        output_list, outputs_format = device.result()
        assert_is_instance(output_list, list)
        assert_true(outputs_format,
                    "for train, we should always get the format")
        outputs = Device.make_result_dict(output_list, outputs_format)
        print("seq %i" % seq_idx)
        pprint(outputs)
        assert_in("cost:output", outputs)
        assert_in("cost:auto-enc", outputs)
        cost_output_sum += outputs["cost:output"]

    # Now, drop the auto-enc from the network, and redo the same thing.
    del config.typed_value("network")["auto-enc"]
    device = Device("cpu", config=config, blocking=True)
    device.trainnet.set_params_by_dict(get_net_params(with_auto_enc=False))
    device.testnet.set_params_by_dict(get_net_params(with_auto_enc=False))
    for p in device.trainnet.get_all_params_vars():
        print "second run, init %s:" % p
        pprint(p.get_value())
    dataset.init_seq_order()  # reset

    cost2_output_sum = 0.0
    for seq_idx in range(dataset.num_seqs):
        # Copy to device allocation.
        success = assign_dev_data_single_seq(device, dataset, seq_idx)
        assert_true(success, "failed to allocate & assign data")

        # One train step.
        device.set_learning_rate(config.typed_value("learning_rate"))
        device.run("train")
        output_list, outputs_format = device.result()
        assert_is_instance(output_list, list)
        assert_true(outputs_format,
                    "for train, we should always get the format")
        outputs = Device.make_result_dict(output_list, outputs_format)
        print("seq %i" % seq_idx)
        pprint(outputs)
        assert_in("cost:output", outputs)
        assert_not_in("cost:auto-enc", outputs)
        cost2_output_sum += outputs["cost:output"]

    assert_equal(cost_output_sum, cost2_output_sum)
    assert_almost_equal(cost_output_sum, 16.028842568397522, places=6)
Beispiel #23
0
def test_batches_context_window():
  context_window = 2
  ctx_lr = context_window - 1
  ctx_left = ctx_lr // 2
  ctx_right = ctx_lr - ctx_left

  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=1, seq_len=11, context_window=context_window)
  dataset.init_seq_order(1)
  dataset.chunk_size = 5
  dataset.chunk_step = 5
  batch_gen = dataset.generate_batches(recurrent_net=True, max_seqs=1, batch_size=20)
  all_batches = []  # type: list[Batch]
  while batch_gen.has_more():
    batch, = batch_gen.peek_next_n(1)
    assert_is_instance(batch, Batch)
    print("batch:", batch)
    print("batch seqs:", batch.seqs)
    all_batches.append(batch)
    batch_gen.advance(1)

  # Each batch will have 1 batch-slice (max_seqs) and up to 10 frames (chunk_size).
  # For each seq, we get 3 chunks (chunk_step 5 for 11 frames).
  # Thus, 3 batches.
  assert_equal(len(all_batches), 3)
  b0, b1, b2 = all_batches
  assert isinstance(b0, Batch)
  assert isinstance(b1, Batch)
  assert isinstance(b2, Batch)

  assert_equal(b0.start_seq, 0)
  assert_equal(b0.end_seq, 1)  # exclusive
  assert_equal(len(b0.seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(b0.seqs[0].seq_idx, 0)
  assert_equal(b0.seqs[0].seq_start_frame["classes"], 0)
  assert_equal(b0.seqs[0].seq_end_frame["classes"], 5)
  assert_equal(b0.seqs[0].frame_length["classes"], 5)
  assert_equal(b0.seqs[0].seq_start_frame["data"], 0 - ctx_left)
  assert_equal(b0.seqs[0].seq_end_frame["data"], 5 + ctx_right)
  assert_equal(b0.seqs[0].frame_length["data"], 5 + ctx_lr)
  assert_equal(b0.seqs[0].batch_slice, 0)
  assert_equal(b0.seqs[0].batch_frame_offset, 0)

  assert_equal(b1.start_seq, 0)
  assert_equal(b1.end_seq, 1)  # exclusive
  assert_equal(len(b1.seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(b1.seqs[0].seq_idx, 0)
  assert_equal(b1.seqs[0].seq_start_frame["classes"], 5)
  assert_equal(b1.seqs[0].seq_end_frame["classes"], 10)
  assert_equal(b1.seqs[0].frame_length["classes"], 5)
  assert_equal(b1.seqs[0].seq_start_frame["data"], 5 - ctx_left)
  assert_equal(b1.seqs[0].seq_end_frame["data"], 10 + ctx_right)
  assert_equal(b1.seqs[0].frame_length["data"], 5 + ctx_lr)
  assert_equal(b1.seqs[0].batch_slice, 0)
  assert_equal(b1.seqs[0].batch_frame_offset, 0)

  assert_equal(b2.start_seq, 0)
  assert_equal(b2.end_seq, 1)  # exclusive
  assert_equal(len(b2.seqs), 1)  # 1 BatchSeqCopyPart
  assert_equal(b2.seqs[0].seq_idx, 0)
  assert_equal(b2.seqs[0].seq_start_frame["classes"], 10)
  assert_equal(b2.seqs[0].seq_end_frame["classes"], 11)
  assert_equal(b2.seqs[0].frame_length["classes"], 1)
  assert_equal(b2.seqs[0].seq_start_frame["data"], 10 - ctx_left)
  assert_equal(b2.seqs[0].seq_end_frame["data"], 11 + ctx_right)
  assert_equal(b2.seqs[0].frame_length["data"], 1 + ctx_lr)
  assert_equal(b2.seqs[0].batch_slice, 0)
  assert_equal(b2.seqs[0].batch_frame_offset, 0)