Example #1
0
def test_assign_dev_data():
    config = Config()
    config.update(dummyconfig_dict)
    device = DummyDevice(config=config)
    dataset = DummyDataset(input_dim=config.int("num_inputs", 0),
                           output_dim=config.int("num_outputs", 0),
                           num_seqs=10)
    batches = [generate_batch(0, dataset), generate_batch(1, dataset)]
    success, num_batches = assign_dev_data(device, dataset, batches)
    assert_true(success)
    assert_equal(num_batches, len(batches))
Example #2
0
def test_assign_dev_data():
  config = Config()
  config.update(dummyconfig_dict)
  device = DummyDevice(config=config)
  dataset = DummyDataset(input_dim=config.int("num_inputs", 0),
                         output_dim=config.int("num_outputs", 0),
                         num_seqs=10)
  batches = [generate_batch(0, dataset), generate_batch(1, dataset)]
  success, num_batches = assign_dev_data(device, dataset, batches)
  assert_true(success)
  assert_equal(num_batches, len(batches))
Example #3
0
    def forward_fill_queue(self):
        """
    Full sequence forwarding, no chunking (at the moment).
    """
        assert self.train_started
        if self.is_forwarding_finished: return

        # We will ignore max_seq_length.
        batch_size = self.config.int('batch_size', 1)
        max_seqs = self.config.int('max_seqs', -1)
        if max_seqs <= 0: max_seqs = float('inf')
        dataset = self.engine.train_data
        from EngineBatch import Batch

        # Collect all batches.
        forward_batches = []
        ":type: list[EngineBatch.Batch]"
        num_seqs = 0
        while self._device_exec("have_space_in_forward_data_queue",
                                num_seqs=num_seqs):
            # Load next sequence for forwarding, keep all which are still needed for training.
            if not dataset.is_less_than_num_seqs(self.forward_current_seq):
                self.is_forwarding_finished = True
                break
            dataset.load_seqs(self.train_start_seq,
                              self.forward_current_seq + 1)
            seq_len = dataset.get_seq_length(self.forward_current_seq)

            if not forward_batches:
                forward_batches.append(Batch())
            batch = forward_batches[-1]
            dt, ds = batch.try_sequence_as_slice(seq_len)
            if ds > 1 and ((dt * ds).max_value() > batch_size
                           or ds > max_seqs):
                batch = Batch()
                forward_batches.append(batch)
            batch.add_sequence_as_slice(seq_idx=self.forward_current_seq,
                                        seq_start_frame=0,
                                        length=seq_len)
            num_seqs += 1
            self.forward_current_seq += 1

        # Forward the batches.
        from EngineUtil import assign_dev_data
        for batch in forward_batches:
            print >> log.v4, "SeqTrainParallelControl, forward %r" % batch
            success = assign_dev_data(self.train_device,
                                      dataset, [batch],
                                      load_seqs=False)
            assert success, "failed to allocate & assign data"
            self.train_device.update_data()
            self._device_exec("do_forward", batch=batch)
            self._device_exec("train_check_calc_loss")
Example #4
0
def test_assign_dev_data():
  config = Config()
  config.update(dummyconfig_dict)
  device = DummyDevice(config=config)
  dataset = ExternSprintDataset(sprintExecPath,
                                "--*.feature-dimension=2 --*.trainer-output-dimension=3 "
                                "--*.crnn-dataset=DummyDataset(2,3,4)")
  dataset.init_seq_order(epoch=1)
  assert_true(dataset.is_less_than_num_seqs(0))
  recurrent = False
  batch_generator = dataset.generate_batches(recurrent_net=recurrent, batch_size=512)
  batches = batch_generator.peek_next_n(2)
  assert_equal(len(batches), 2)
  success, num_batches = assign_dev_data(device, dataset, batches)
  assert_true(success)
  assert_equal(num_batches, len(batches))
Example #5
0
  def forward_fill_queue(self):
    """
    Full sequence forwarding, no chunking (at the moment).
    """
    assert self.train_started
    if self.is_forwarding_finished: return

    # We will ignore max_seq_length.
    batch_size = self.config.int('batch_size', 1)
    max_seqs = self.config.int('max_seqs', -1)
    if max_seqs <= 0: max_seqs = float('inf')
    dataset = self.engine.train_data
    from EngineBatch import Batch

    # Collect all batches.
    forward_batches = []; ":type: list[EngineBatch.Batch]"
    num_seqs = 0
    while self._device_exec("have_space_in_forward_data_queue", num_seqs=num_seqs):
      # Load next sequence for forwarding, keep all which are still needed for training.
      if not dataset.is_less_than_num_seqs(self.forward_current_seq):
        self.is_forwarding_finished = True
        break
      dataset.load_seqs(self.train_start_seq, self.forward_current_seq + 1)
      seq_len = dataset.get_seq_length(self.forward_current_seq)

      if not forward_batches:
        forward_batches.append(Batch())
      batch = forward_batches[-1]
      dt, ds = batch.try_sequence_as_slice(seq_len)
      if ds > 1 and ((dt * ds).max_value() > batch_size or ds > max_seqs):
        batch = Batch()
        forward_batches.append(batch)
      batch.add_sequence_as_slice(seq_idx=self.forward_current_seq, seq_start_frame=0, length=seq_len)
      num_seqs += 1
      self.forward_current_seq += 1

    # Forward the batches.
    from EngineUtil import assign_dev_data
    for batch in forward_batches:
      print >> log.v4, "SeqTrainParallelControl, forward %r" % batch
      success = assign_dev_data(self.train_device, dataset, [batch], load_seqs=False)
      assert success, "failed to allocate & assign data"
      self.train_device.update_data()
      self._device_exec("do_forward", batch=batch)
      self._device_exec("train_check_calc_loss")
def test_assign_dev_data():
  config = Config()
  config.update(dummyconfig_dict)
  print("Create ExternSprintDataset")
  dataset = ExternSprintDataset(
    [sys.executable, sprintExecPath],
    "--*.feature-dimension=2 --*.trainer-output-dimension=3 --*.crnn-dataset=DummyDataset(2,3,4)")
  dataset.init_seq_order(epoch=1)
  assert_true(dataset.is_less_than_num_seqs(0))
  recurrent = False
  batch_generator = dataset.generate_batches(recurrent_net=recurrent, batch_size=512)
  batches = batch_generator.peek_next_n(2)
  assert_equal(len(batches), 2)
  print("Create Device")
  device = DummyDevice(config=config)
  success, num_batches = assign_dev_data(device, dataset, batches)
  assert_true(success)
  assert_equal(num_batches, len(batches))
Example #7
0
 def assign_dev_data(self, device, batches):
     return assign_dev_data(device, self.data, batches)
Example #8
0
 def assign_dev_data(self, device, batches):
   return assign_dev_data(device, self.data, batches)