Beispiel #1
0
def load(lstm_opts=None):
    if not lstm_opts: lstm_opts = {"class": "lstm2"}
    lstm_opts = lstm_opts.copy()
    lstm_opts.update({"n_out": 10, "from": "in"})
    num_inputs = 9
    num_outputs = 2
    net_topo = {
        "in": {
            "class": "dump",
            "filename": "in"
        },
        "lstm": lstm_opts,
        "lstm_dump": {
            "class": "dump",
            "from": "lstm",
            "filename": "lstm"
        },
        "output": {
            "class": "softmax",
            "loss": "ce",
            "from": "lstm_dump"
        }
    }

    collected_data = {}
    DumpLayer.global_debug_container = collected_data

    net = Network.LayerNetwork.from_json(json_content=net_topo,
                                         n_in=num_inputs,
                                         n_out={"classes": (num_outputs, 1)},
                                         train_flag=True)
    net.declare_train_params()

    # Init dataset and prepare one minibatch.
    epoch = 1
    dataset = Task12AXDataset(num_seqs=1000,
                              seq_ordering="random",
                              chunking="200:200")
    dataset.init_seq_order(epoch=epoch)
    batch_gen = dataset.generate_batches(recurrent_net=net.recurrent,
                                         batch_size=5000,
                                         max_seqs=10)
    batches = batch_gen.peek_next_n(1)
    # We need the DummyDevice for assign_dev_data.
    dev = DummyDevice()
    assign_success, _ = EngineUtil.assign_dev_data(device=dev,
                                                   dataset=dataset,
                                                   batches=batches)
    assert assign_success
    dev.initialize(net)
    dev.update_data()
    givens = [(net.y[k], dev.y[k]) for k in dev.used_data_keys]
    givens += [(net.j[k], dev.j[k]) for k in dev.used_data_keys]

    # Now gradients, updates and compile everything.
    gradients = {
        p: T.grad(net.get_objective(), p, known_grads=net.known_grads)
        for p in net.train_params_vars
    }
    updater = Updater(adam=True)
    updater.initVars(net, gradients)
    updater.setLearningRate(learning_rate=0.01)
    trainer = theano.function(inputs=[],
                              outputs=[net.total_cost],
                              givens=givens,
                              updates=updater.getUpdateList(),
                              on_unused_input='warn',
                              name="train_and_updater")

    for p in net.train_params_vars:
        collected_data["param:%s" % p.name] = p.get_value()

    # And finally, run it.
    cost = trainer()
    collected_data["cost"] = cost
    return collected_data
Beispiel #2
0
def load(lstm_opts=None):
  if not lstm_opts: lstm_opts = {"class": "lstm2"}
  lstm_opts = lstm_opts.copy()
  lstm_opts.update({"n_out": 10, "from": "in"})
  num_inputs = 9
  num_outputs = 2
  net_topo = {
    "in": {"class": "dump", "filename": "in"},
    "lstm": lstm_opts,
    "lstm_dump": {"class": "dump", "from": "lstm", "filename": "lstm"},
    "output": {"class": "softmax", "loss": "ce", "from": "lstm_dump"}
  }

  collected_data = {}
  DumpLayer.global_debug_container = collected_data

  net = Network.LayerNetwork.from_json(
    json_content=net_topo,
    n_in=num_inputs,
    n_out={"classes": (num_outputs, 1)},
    train_flag=True
  )
  net.declare_train_params()

  # Init dataset and prepare one minibatch.
  epoch = 1
  dataset = Task12AXDataset(num_seqs=1000, seq_ordering="random", chunking="200:200")
  dataset.init_seq_order(epoch=epoch)
  batch_gen = dataset.generate_batches(
    recurrent_net=net.recurrent,
    batch_size=5000,
    max_seqs=10)
  batches = batch_gen.peek_next_n(1)
  # We need the DummyDevice for assign_dev_data.
  dev = DummyDevice()
  assign_success, _ = EngineUtil.assign_dev_data(device=dev, dataset=dataset, batches=batches)
  assert assign_success
  dev.initialize(net)
  dev.update_data()
  givens = [(net.y[k], dev.y[k]) for k in dev.used_data_keys]
  givens += [(net.j[k], dev.j[k]) for k in dev.used_data_keys]

  # Now gradients, updates and compile everything.
  gradients = {p: T.grad(net.get_objective(), p, known_grads=net.known_grads)
               for p in net.train_params_vars}
  updater = Updater(adam=True)
  updater.initVars(net, gradients)
  updater.setLearningRate(learning_rate=0.01)
  trainer = theano.function(
    inputs=[],
    outputs=[net.total_cost],
    givens=givens,
    updates=updater.getUpdateList(),
    on_unused_input='warn',
    name="train_and_updater")

  for p in net.train_params_vars:
    collected_data["param:%s" % p.name] = p.get_value()

  # And finally, run it.
  cost = trainer()
  collected_data["cost"] = cost
  return collected_data