コード例 #1
0
def test_assign_dev_data():
  config = Config()
  config.update(dummyconfig_dict)
  device = DummyDevice(config=config)
  dataset = DummyDataset(input_dim=config.int("num_inputs", 0),
                         output_dim=config.int("num_outputs", 0),
                         num_seqs=10)
  batches = [generate_batch(0, dataset), generate_batch(1, dataset)]
  success, num_batches = assign_dev_data(device, dataset, batches)
  assert_true(success)
  assert_equal(num_batches, len(batches))
コード例 #2
0
def test_DummyDevice():
    dataset = Task12AXDataset(num_seqs=1000,
                              seq_ordering="random",
                              chunking="200:200")
    dataset.init_seq_order(epoch=1)
    batch_gen = dataset.generate_batches(recurrent_net=True,
                                         batch_size=1000,
                                         max_seqs=3)
    batches = batch_gen.peek_next_n(1)
    dev = DummyDevice()
    assign_success, _ = engine_util.assign_dev_data(device=dev,
                                                    dataset=dataset,
                                                    batches=batches)
    assert assign_success
コード例 #3
0
ファイル: engine.py プロジェクト: vieting/returnn
  def forward_fill_queue(self):
    """
    Full sequence forwarding, no chunking (at the moment).
    """
    assert self.train_started
    if self.is_forwarding_finished: return

    # We will ignore max_seq_length.
    batch_size = self.config.int('batch_size', 1)
    max_seqs = self.config.int('max_seqs', -1)
    if max_seqs <= 0: max_seqs = float('inf')
    dataset = self.engine.train_data
    from returnn.engine.batch import Batch

    # Collect all batches.
    forward_batches = []; ":type: list[EngineBatch.Batch]"
    num_seqs = 0
    while self._device_exec("have_space_in_forward_data_queue", num_seqs=num_seqs):
      # Load next sequence for forwarding, keep all which are still needed for training.
      if not dataset.is_less_than_num_seqs(self.forward_current_seq):
        self.is_forwarding_finished = True
        break
      dataset.load_seqs(self.train_start_seq, self.forward_current_seq + 1)
      seq_len = dataset.get_seq_length(self.forward_current_seq)

      if not forward_batches:
        forward_batches.append(Batch())
      batch = forward_batches[-1]
      dt, ds = batch.try_sequence_as_slice(seq_len)
      if ds > 1 and ((dt * ds).max_value() > batch_size or ds > max_seqs):
        batch = Batch()
        forward_batches.append(batch)
      batch.add_sequence_as_slice(seq_idx=self.forward_current_seq, seq_start_frame=0, length=seq_len)
      num_seqs += 1
      self.forward_current_seq += 1

    # Forward the batches.
    from returnn.theano.engine_util import assign_dev_data
    for batch in forward_batches:
      print("SeqTrainParallelControl, forward %r" % batch, file=log.v4)
      success = assign_dev_data(self.train_device, dataset, [batch], load_seqs=False)
      assert success, "failed to allocate & assign data"
      self.train_device.update_data()
      self._device_exec("do_forward", batch=batch)
      self._device_exec("train_check_calc_loss")
コード例 #4
0
ファイル: test_SprintDataset.py プロジェクト: vieting/returnn
def test_assign_dev_data():
    config = Config()
    config.update(dummyconfig_dict)
    print("Create ExternSprintDataset")
    dataset = ExternSprintDataset([
        sys.executable, sprintExecPath
    ], "--*.feature-dimension=2 --*.trainer-output-dimension=3 --*.crnn-dataset=DummyDataset(2,3,num_seqs=4,seq_len=10)"
                                  )
    dataset.init_seq_order(epoch=1)
    assert_true(dataset.is_less_than_num_seqs(0))
    recurrent = False
    batch_generator = dataset.generate_batches(recurrent_net=recurrent,
                                               batch_size=5)
    batches = batch_generator.peek_next_n(2)
    assert_equal(len(batches), 2)
    if theano:
        print("Create Device")
        device = DummyDevice(config=config)
        success, num_batches = assign_dev_data(device, dataset, batches)
        assert_true(success)
        assert_equal(num_batches, len(batches))
コード例 #5
0
def load(lstm_opts=None):
    if not lstm_opts: lstm_opts = {"class": "lstm2"}
    lstm_opts = lstm_opts.copy()
    lstm_opts.update({"n_out": 10, "from": "in"})
    num_inputs = 9
    num_outputs = 2
    net_topo = {
        "in": {
            "class": "dump",
            "filename": "in"
        },
        "lstm": lstm_opts,
        "lstm_dump": {
            "class": "dump",
            "from": "lstm",
            "filename": "lstm"
        },
        "output": {
            "class": "softmax",
            "loss": "ce",
            "from": "lstm_dump"
        }
    }

    collected_data = {}
    DumpLayer.global_debug_container = collected_data

    net = network.LayerNetwork.from_json(json_content=net_topo,
                                         n_in=num_inputs,
                                         n_out={"classes": (num_outputs, 1)},
                                         train_flag=True)
    net.declare_train_params()

    # Init dataset and prepare one minibatch.
    epoch = 1
    dataset = Task12AXDataset(num_seqs=1000,
                              seq_ordering="random",
                              chunking="200:200")
    dataset.init_seq_order(epoch=epoch)
    batch_gen = dataset.generate_batches(recurrent_net=net.recurrent,
                                         batch_size=5000,
                                         max_seqs=10)
    batches = batch_gen.peek_next_n(1)
    # We need the DummyDevice for assign_dev_data.
    dev = DummyDevice()
    assign_success, _ = engine_util.assign_dev_data(device=dev,
                                                    dataset=dataset,
                                                    batches=batches)
    assert assign_success
    dev.initialize(net)
    dev.update_data()
    givens = [(net.y[k], dev.y[k]) for k in dev.used_data_keys]
    givens += [(net.j[k], dev.j[k]) for k in dev.used_data_keys]

    # Now gradients, updates and compile everything.
    gradients = {
        p: T.grad(net.get_objective(), p, known_grads=net.known_grads)
        for p in net.train_params_vars
    }
    updater = Updater(adam=True)
    updater.initVars(net, gradients)
    updater.setLearningRate(learning_rate=0.01)
    trainer = theano.function(inputs=[],
                              outputs=[net.total_cost],
                              givens=givens,
                              updates=updater.getUpdateList(),
                              on_unused_input='warn',
                              name="train_and_updater")

    for p in net.train_params_vars:
        collected_data["param:%s" % p.name] = p.get_value()

    # And finally, run it.
    cost = trainer()
    collected_data["cost"] = cost
    return collected_data
コード例 #6
0
 def assign_dev_data(self, device, batches):
     return assign_dev_data(device, self.data, batches)