def train(self): print_loss, tic = 0, time() for i, sample in enumerate(self.train_loader): image, refs = [x.to(device) for x in [sample[0], sample[4]]] ref_lens, img_path, index = sample[5], sample[7], sample[8] batch_loss = self.do_iteration(image, refs, ref_lens, index, img_path) print_loss += batch_loss info = { 'collect/loss': print_loss / self.opt.print_every, 'collect/time': (time() - tic) / self.opt.print_every # total time so far for this epoch } util.step_logging(self.logger, info, self.collection_steps) if self.collection_steps % self.opt.print_every == 0: util.log_avg_grads(self.logger, self.dmaker, self.collection_steps, name="dec") steps_per_epoch = len(self.train_loader) self.std_logger.info( "Chunk {} Epoch {}, {}/{}| Loss: {} | Time per batch: {} |" " Epoch remaining time (HH:MM:SS) {} | Elapsed time {}". format( self.chunk + 1, self.collection_epoch, i, steps_per_epoch, info['collect/loss'], info['collect/time'], util.time_remaining(steps_per_epoch - i, info['collect/time']), util.time_elapsed(self.start_time, time()))) print_loss, tic = 0, time() self.collection_steps += 1 self.trainLLvisualizer.update_html() self.data_collector.process_collected_data()
def train(self, epoch): print("Training") print_loss, tic = 0, time() for i, sample in enumerate(self.train_loader): image, question, question_len, answer, captions = sample[:-3] image, question, captions, answer = [ x.to(device) for x in [image, question, captions, answer] ] self.optimizer.zero_grad() # Forward pass result = self.model(image, question, captions) logits = result.logits # Get loss loss = self.loss_function( logits, answer) # answer is coming in as double for some reason # Backward pass loss.backward() if self.opt.grad_clip: util.gradient_noise_and_clip(self.model.parameters(), self.opt.max_clip) self.optimizer.step() # Logging print_loss += loss.item() if self.global_step % self.opt.print_every == 0: info = { 'loss': print_loss / self.opt.print_every, 'time': (time() - tic) / self.opt.print_every # time per step } util.step_logging(self.logger, info, self.global_step) util.log_avg_grads(self.logger, self.model, self.global_step) steps_per_epoch = len(self.train_loader) step = self.global_step - epoch * steps_per_epoch remaining_steps = steps_per_epoch * (self.opt.max_epochs - epoch) - step self.std_logger.info( "{}, {}/{}| Loss: {} | Time per batch: {} | Epoch remaining time (HH:MM:SS) {} | " "Elapsed time {} | Total remaining time {}".format( epoch + 1, step, steps_per_epoch, info['loss'], info['time'], util.time_remaining(steps_per_epoch - step, info['time']), util.time_elapsed(self.start_time, time()), util.time_remaining(remaining_steps, info['time']))) print_loss, tic = 0, time() self.global_step = self.global_step + 1 model_score = self.evaluate(epoch + 1) self.save_checkpoint(epoch, model_score)
def train(self, epoch): print("Training") print_loss, tic = 0, time() self.model.train() # manually iterate over dataset word_iter = self.word_match_loader.__iter__() pos_iter = self.pos_match_loader.__iter__() while True: try: word_batch = word_iter.next() pos_batch = pos_iter.next() except StopIteration: break word_batch = [x.to(device) for x in word_batch[:-1]] pos_batch = [x.to(device) for x in pos_batch[:-1]] image, question_len, source, target, caption, q_idx_vec, pos, att, context = self.compute_cap_features( word_batch, pos_batch) # Forward pass self.optimizer.zero_grad() logits = self.model(image, caption, pos, context, att, source, q_idx_vec) loss = masked_CE(logits, target, question_len) # Backward pass loss.backward() if self.opt.grad_clip: util.gradient_noise_and_clip(self.model.parameters(), self.opt.max_clip) self.optimizer.step() # Logging print_loss += loss.item() if self.global_step % self.opt.print_every == 0: info = { 'loss': print_loss / self.opt.print_every, 'time': (time() - tic) / self.opt.print_every # time per step } util.step_logging(self.logger, info, self.global_step) util.log_avg_grads(self.logger, self.model, self.global_step) steps_per_epoch = len(self.word_match_loader) step = self.global_step - epoch * steps_per_epoch remaining_steps = steps_per_epoch * (self.opt.max_epochs - epoch) - step self.std_logger.info( "{}, {}/{}| Loss: {} | Time per batch: {} | Epoch remaining time (HH:MM:SS) {} | " "Elapsed time {} | Total remaining time {}".format( epoch + 1, step, steps_per_epoch, info['loss'], info['time'], util.time_remaining(steps_per_epoch - step, info['time']), util.time_elapsed(self.start_time, time()), util.time_remaining(remaining_steps, info['time']))) print_loss, tic = 0, time() self.global_step = self.global_step + 1 model_score = self.evaluate(epoch + 1) self.save_checkpoint(epoch, model_score)
def train_captioner(self): self.captioner.train() for epoch in range(self.opt.cap_epochs): self.cap_epoch = epoch self.update_lr(epoch) self.update_ss(epoch) print_loss, tic = 0, time() print("Training captioner") for i, sample in enumerate(self.train_loader): image, source, target, caption_len, pos, weight = [ x.to(device) for x in sample ] # Forward pass self.c_optimizer.zero_grad() r = self.captioner(image, source, pos) logits, pos_logits = r.logits, r.pos_logits if self.opt.weight_captions: word_loss = masked_CE(logits, target, caption_len, weight.float()) pos_loss = masked_CE(pos_logits, pos, caption_len - 1, weight.float()) else: word_loss = masked_CE(logits, target, caption_len) pos_loss = masked_CE(pos_logits, pos, caption_len - 1) total_loss = word_loss + self.opt.pos_alpha * pos_loss # Backwards pass total_loss.backward() if self.opt.grad_clip: util.gradient_noise_and_clip(self.captioner.parameters(), self.opt.max_clip) self.c_optimizer.step() # Logging print_loss += total_loss.item() if self.cap_steps % self.opt.print_every == 0: info = { 'cap/loss': print_loss / self.opt.print_every, 'cap/time': (time() - tic) / self.opt. print_every # total time so far for this epoch } util.step_logging(self.logger, info, self.cap_steps) util.log_avg_grads(self.logger, self.captioner, self.cap_steps, name="cap/") steps_per_epoch = len(self.train_loader) self.std_logger.info( "Chunk {} Epoch {}, {}/{}| Loss: {} | Time per batch: {} |" " Epoch remaining time (HH:MM:SS) {} | Elapsed time {}" .format( self.chunk + 1, epoch + 1, i, steps_per_epoch, info['cap/loss'], info['cap/time'], util.time_remaining(steps_per_epoch - i, info['cap/time']), util.time_elapsed(self.start_time, time()))) print_loss, tic = 0, time() self.cap_steps += 1 model_score = self.evaluate_captioner() self.save_captioner(epoch, model_score)
def train(self, epoch): print("Training") print_loss, tic = 0, time() self.model.train() for i, sample in enumerate(self.train_loader): image, source, target, caption_len, refs, ref_lens, pos = sample[: -3] image, source, target, caption_len, pos = [ x.to(device) for x in [image, source, target, caption_len, pos] ] # Forward pass self.optimizer.zero_grad() result = self.model(image, source, pos) logits, pos_logits = result.logits, result.pos_logits # Get losses word_loss = masked_CE(logits, target, caption_len) pos_loss = masked_CE(pos_logits, pos, caption_len - 1) total_loss = word_loss + self.opt.pos_alpha * pos_loss # Backward pass total_loss.backward() if self.opt.grad_clip: util.gradient_noise_and_clip(self.model.parameters(), self.opt.max_clip) self.optimizer.step() loss = total_loss.item() # Logging print_loss += loss if self.global_step % self.opt.print_every == 0: info = { 'loss': print_loss / self.opt.print_every, 'time': (time() - tic) / self.opt.print_every # time per step } util.step_logging(self.logger, info, self.global_step) util.log_avg_grads(self.logger, self.model, self.global_step) steps_per_epoch = len(self.train_loader) step = self.global_step - epoch * steps_per_epoch remaining_steps = steps_per_epoch * (self.opt.max_epochs - epoch) - step self.std_logger.info( "{}, {}/{}| Loss: {} | Time per batch: {} | Epoch remaining time (HH:MM:SS) {} | " "Elapsed time {} | Total remaining time {}".format( epoch + 1, step, steps_per_epoch, info['loss'], info['time'], util.time_remaining(steps_per_epoch - step, info['time']), util.time_elapsed(self.start_time, time()), util.time_remaining(remaining_steps, info['time']))) print_loss, tic = 0, time() self.global_step = self.global_step + 1 model_score = self.evaluate(epoch + 1) self.save_checkpoint(epoch, model_score)