def test_assign_dev_data(): config = Config() config.update(dummyconfig_dict) device = DummyDevice(config=config) dataset = DummyDataset(input_dim=config.int("num_inputs", 0), output_dim=config.int("num_outputs", 0), num_seqs=10) batches = [generate_batch(0, dataset), generate_batch(1, dataset)] success, num_batches = assign_dev_data(device, dataset, batches) assert_true(success) assert_equal(num_batches, len(batches))
def forward_fill_queue(self): """ Full sequence forwarding, no chunking (at the moment). """ assert self.train_started if self.is_forwarding_finished: return # We will ignore max_seq_length. batch_size = self.config.int('batch_size', 1) max_seqs = self.config.int('max_seqs', -1) if max_seqs <= 0: max_seqs = float('inf') dataset = self.engine.train_data from EngineBatch import Batch # Collect all batches. forward_batches = [] ":type: list[EngineBatch.Batch]" num_seqs = 0 while self._device_exec("have_space_in_forward_data_queue", num_seqs=num_seqs): # Load next sequence for forwarding, keep all which are still needed for training. if not dataset.is_less_than_num_seqs(self.forward_current_seq): self.is_forwarding_finished = True break dataset.load_seqs(self.train_start_seq, self.forward_current_seq + 1) seq_len = dataset.get_seq_length(self.forward_current_seq) if not forward_batches: forward_batches.append(Batch()) batch = forward_batches[-1] dt, ds = batch.try_sequence_as_slice(seq_len) if ds > 1 and ((dt * ds).max_value() > batch_size or ds > max_seqs): batch = Batch() forward_batches.append(batch) batch.add_sequence_as_slice(seq_idx=self.forward_current_seq, seq_start_frame=0, length=seq_len) num_seqs += 1 self.forward_current_seq += 1 # Forward the batches. from EngineUtil import assign_dev_data for batch in forward_batches: print >> log.v4, "SeqTrainParallelControl, forward %r" % batch success = assign_dev_data(self.train_device, dataset, [batch], load_seqs=False) assert success, "failed to allocate & assign data" self.train_device.update_data() self._device_exec("do_forward", batch=batch) self._device_exec("train_check_calc_loss")
def test_assign_dev_data(): config = Config() config.update(dummyconfig_dict) device = DummyDevice(config=config) dataset = ExternSprintDataset(sprintExecPath, "--*.feature-dimension=2 --*.trainer-output-dimension=3 " "--*.crnn-dataset=DummyDataset(2,3,4)") dataset.init_seq_order(epoch=1) assert_true(dataset.is_less_than_num_seqs(0)) recurrent = False batch_generator = dataset.generate_batches(recurrent_net=recurrent, batch_size=512) batches = batch_generator.peek_next_n(2) assert_equal(len(batches), 2) success, num_batches = assign_dev_data(device, dataset, batches) assert_true(success) assert_equal(num_batches, len(batches))
def forward_fill_queue(self): """ Full sequence forwarding, no chunking (at the moment). """ assert self.train_started if self.is_forwarding_finished: return # We will ignore max_seq_length. batch_size = self.config.int('batch_size', 1) max_seqs = self.config.int('max_seqs', -1) if max_seqs <= 0: max_seqs = float('inf') dataset = self.engine.train_data from EngineBatch import Batch # Collect all batches. forward_batches = []; ":type: list[EngineBatch.Batch]" num_seqs = 0 while self._device_exec("have_space_in_forward_data_queue", num_seqs=num_seqs): # Load next sequence for forwarding, keep all which are still needed for training. if not dataset.is_less_than_num_seqs(self.forward_current_seq): self.is_forwarding_finished = True break dataset.load_seqs(self.train_start_seq, self.forward_current_seq + 1) seq_len = dataset.get_seq_length(self.forward_current_seq) if not forward_batches: forward_batches.append(Batch()) batch = forward_batches[-1] dt, ds = batch.try_sequence_as_slice(seq_len) if ds > 1 and ((dt * ds).max_value() > batch_size or ds > max_seqs): batch = Batch() forward_batches.append(batch) batch.add_sequence_as_slice(seq_idx=self.forward_current_seq, seq_start_frame=0, length=seq_len) num_seqs += 1 self.forward_current_seq += 1 # Forward the batches. from EngineUtil import assign_dev_data for batch in forward_batches: print >> log.v4, "SeqTrainParallelControl, forward %r" % batch success = assign_dev_data(self.train_device, dataset, [batch], load_seqs=False) assert success, "failed to allocate & assign data" self.train_device.update_data() self._device_exec("do_forward", batch=batch) self._device_exec("train_check_calc_loss")
def test_assign_dev_data(): config = Config() config.update(dummyconfig_dict) print("Create ExternSprintDataset") dataset = ExternSprintDataset( [sys.executable, sprintExecPath], "--*.feature-dimension=2 --*.trainer-output-dimension=3 --*.crnn-dataset=DummyDataset(2,3,4)") dataset.init_seq_order(epoch=1) assert_true(dataset.is_less_than_num_seqs(0)) recurrent = False batch_generator = dataset.generate_batches(recurrent_net=recurrent, batch_size=512) batches = batch_generator.peek_next_n(2) assert_equal(len(batches), 2) print("Create Device") device = DummyDevice(config=config) success, num_batches = assign_dev_data(device, dataset, batches) assert_true(success) assert_equal(num_batches, len(batches))
def assign_dev_data(self, device, batches): return assign_dev_data(device, self.data, batches)