Example #1
0
    def train(self):

        print_loss, tic = 0, time()

        for i, sample in enumerate(self.train_loader):

            image, refs = [x.to(device) for x in [sample[0], sample[4]]]
            ref_lens, img_path, index = sample[5], sample[7], sample[8]
            batch_loss = self.do_iteration(image, refs, ref_lens, index,
                                           img_path)

            print_loss += batch_loss

            info = {
                'collect/loss': print_loss / self.opt.print_every,
                'collect/time': (time() - tic) /
                self.opt.print_every  # total time so far for this epoch
            }
            util.step_logging(self.logger, info, self.collection_steps)

            if self.collection_steps % self.opt.print_every == 0:
                util.log_avg_grads(self.logger,
                                   self.dmaker,
                                   self.collection_steps,
                                   name="dec")
                steps_per_epoch = len(self.train_loader)
                self.std_logger.info(
                    "Chunk {} Epoch {}, {}/{}| Loss: {} | Time per batch: {} |"
                    " Epoch remaining time (HH:MM:SS) {} | Elapsed time {}".
                    format(
                        self.chunk + 1, self.collection_epoch, i,
                        steps_per_epoch, info['collect/loss'],
                        info['collect/time'],
                        util.time_remaining(steps_per_epoch - i,
                                            info['collect/time']),
                        util.time_elapsed(self.start_time, time())))

                print_loss, tic = 0, time()

            self.collection_steps += 1

        self.trainLLvisualizer.update_html()
        self.data_collector.process_collected_data()
    def train(self, epoch):

        print("Training")

        print_loss, tic = 0, time()

        for i, sample in enumerate(self.train_loader):

            image, question, question_len, answer, captions = sample[:-3]
            image, question, captions, answer = [
                x.to(device) for x in [image, question, captions, answer]
            ]

            self.optimizer.zero_grad()

            # Forward pass
            result = self.model(image, question, captions)
            logits = result.logits

            # Get loss
            loss = self.loss_function(
                logits,
                answer)  # answer is coming in as double for some reason

            # Backward pass
            loss.backward()

            if self.opt.grad_clip:
                util.gradient_noise_and_clip(self.model.parameters(),
                                             self.opt.max_clip)

            self.optimizer.step()

            # Logging
            print_loss += loss.item()

            if self.global_step % self.opt.print_every == 0:

                info = {
                    'loss': print_loss / self.opt.print_every,
                    'time':
                    (time() - tic) / self.opt.print_every  # time per step
                }
                util.step_logging(self.logger, info, self.global_step)
                util.log_avg_grads(self.logger, self.model, self.global_step)

                steps_per_epoch = len(self.train_loader)
                step = self.global_step - epoch * steps_per_epoch
                remaining_steps = steps_per_epoch * (self.opt.max_epochs -
                                                     epoch) - step
                self.std_logger.info(
                    "{}, {}/{}| Loss: {} | Time per batch: {} | Epoch remaining time (HH:MM:SS) {} | "
                    "Elapsed time {} | Total remaining time {}".format(
                        epoch + 1, step, steps_per_epoch, info['loss'],
                        info['time'],
                        util.time_remaining(steps_per_epoch - step,
                                            info['time']),
                        util.time_elapsed(self.start_time, time()),
                        util.time_remaining(remaining_steps, info['time'])))
                print_loss, tic = 0, time()

            self.global_step = self.global_step + 1

        model_score = self.evaluate(epoch + 1)
        self.save_checkpoint(epoch, model_score)
    def train(self, epoch):

        print("Training")

        print_loss, tic = 0, time()
        self.model.train()

        # manually iterate over dataset
        word_iter = self.word_match_loader.__iter__()
        pos_iter = self.pos_match_loader.__iter__()
        while True:
            try:
                word_batch = word_iter.next()
                pos_batch = pos_iter.next()
            except StopIteration:
                break

            word_batch = [x.to(device) for x in word_batch[:-1]]
            pos_batch = [x.to(device) for x in pos_batch[:-1]]

            image, question_len, source, target, caption, q_idx_vec, pos, att, context = self.compute_cap_features(
                word_batch, pos_batch)

            # Forward pass
            self.optimizer.zero_grad()
            logits = self.model(image, caption, pos, context, att, source,
                                q_idx_vec)
            loss = masked_CE(logits, target, question_len)

            # Backward pass
            loss.backward()

            if self.opt.grad_clip:
                util.gradient_noise_and_clip(self.model.parameters(),
                                             self.opt.max_clip)

            self.optimizer.step()

            # Logging
            print_loss += loss.item()

            if self.global_step % self.opt.print_every == 0:
                info = {
                    'loss': print_loss / self.opt.print_every,
                    'time':
                    (time() - tic) / self.opt.print_every  # time per step
                }

                util.step_logging(self.logger, info, self.global_step)
                util.log_avg_grads(self.logger, self.model, self.global_step)

                steps_per_epoch = len(self.word_match_loader)
                step = self.global_step - epoch * steps_per_epoch
                remaining_steps = steps_per_epoch * (self.opt.max_epochs -
                                                     epoch) - step
                self.std_logger.info(
                    "{}, {}/{}| Loss: {} | Time per batch: {} | Epoch remaining time (HH:MM:SS) {} | "
                    "Elapsed time {} | Total remaining time {}".format(
                        epoch + 1, step, steps_per_epoch, info['loss'],
                        info['time'],
                        util.time_remaining(steps_per_epoch - step,
                                            info['time']),
                        util.time_elapsed(self.start_time, time()),
                        util.time_remaining(remaining_steps, info['time'])))
                print_loss, tic = 0, time()

            self.global_step = self.global_step + 1

        model_score = self.evaluate(epoch + 1)
        self.save_checkpoint(epoch, model_score)
Example #4
0
    def train_captioner(self):

        self.captioner.train()

        for epoch in range(self.opt.cap_epochs):
            self.cap_epoch = epoch

            self.update_lr(epoch)
            self.update_ss(epoch)

            print_loss, tic = 0, time()

            print("Training captioner")

            for i, sample in enumerate(self.train_loader):

                image, source, target, caption_len, pos, weight = [
                    x.to(device) for x in sample
                ]

                # Forward pass
                self.c_optimizer.zero_grad()

                r = self.captioner(image, source, pos)
                logits, pos_logits = r.logits, r.pos_logits

                if self.opt.weight_captions:
                    word_loss = masked_CE(logits, target, caption_len,
                                          weight.float())
                    pos_loss = masked_CE(pos_logits, pos, caption_len - 1,
                                         weight.float())
                else:
                    word_loss = masked_CE(logits, target, caption_len)
                    pos_loss = masked_CE(pos_logits, pos, caption_len - 1)

                total_loss = word_loss + self.opt.pos_alpha * pos_loss

                # Backwards pass
                total_loss.backward()

                if self.opt.grad_clip:
                    util.gradient_noise_and_clip(self.captioner.parameters(),
                                                 self.opt.max_clip)

                self.c_optimizer.step()

                # Logging
                print_loss += total_loss.item()

                if self.cap_steps % self.opt.print_every == 0:
                    info = {
                        'cap/loss': print_loss / self.opt.print_every,
                        'cap/time': (time() - tic) / self.opt.
                        print_every  # total time so far for this epoch
                    }
                    util.step_logging(self.logger, info, self.cap_steps)
                    util.log_avg_grads(self.logger,
                                       self.captioner,
                                       self.cap_steps,
                                       name="cap/")
                    steps_per_epoch = len(self.train_loader)
                    self.std_logger.info(
                        "Chunk {} Epoch {}, {}/{}| Loss: {} | Time per batch: {} |"
                        " Epoch remaining time (HH:MM:SS) {} | Elapsed time {}"
                        .format(
                            self.chunk + 1, epoch + 1, i, steps_per_epoch,
                            info['cap/loss'], info['cap/time'],
                            util.time_remaining(steps_per_epoch - i,
                                                info['cap/time']),
                            util.time_elapsed(self.start_time, time())))

                    print_loss, tic = 0, time()

                self.cap_steps += 1

            model_score = self.evaluate_captioner()
            self.save_captioner(epoch, model_score)
Example #5
0
    def train(self, epoch):

        print("Training")

        print_loss, tic = 0, time()
        self.model.train()

        for i, sample in enumerate(self.train_loader):
            image, source, target, caption_len, refs, ref_lens, pos = sample[:
                                                                             -3]
            image, source, target, caption_len, pos = [
                x.to(device)
                for x in [image, source, target, caption_len, pos]
            ]

            # Forward pass
            self.optimizer.zero_grad()
            result = self.model(image, source, pos)
            logits, pos_logits = result.logits, result.pos_logits

            # Get losses
            word_loss = masked_CE(logits, target, caption_len)
            pos_loss = masked_CE(pos_logits, pos, caption_len - 1)

            total_loss = word_loss + self.opt.pos_alpha * pos_loss

            # Backward pass
            total_loss.backward()

            if self.opt.grad_clip:
                util.gradient_noise_and_clip(self.model.parameters(),
                                             self.opt.max_clip)

            self.optimizer.step()

            loss = total_loss.item()

            # Logging
            print_loss += loss

            if self.global_step % self.opt.print_every == 0:
                info = {
                    'loss': print_loss / self.opt.print_every,
                    'time':
                    (time() - tic) / self.opt.print_every  # time per step
                }
                util.step_logging(self.logger, info, self.global_step)
                util.log_avg_grads(self.logger, self.model, self.global_step)

                steps_per_epoch = len(self.train_loader)
                step = self.global_step - epoch * steps_per_epoch
                remaining_steps = steps_per_epoch * (self.opt.max_epochs -
                                                     epoch) - step
                self.std_logger.info(
                    "{}, {}/{}| Loss: {} | Time per batch: {} | Epoch remaining time (HH:MM:SS) {} | "
                    "Elapsed time {} | Total remaining time {}".format(
                        epoch + 1, step, steps_per_epoch, info['loss'],
                        info['time'],
                        util.time_remaining(steps_per_epoch - step,
                                            info['time']),
                        util.time_elapsed(self.start_time, time()),
                        util.time_remaining(remaining_steps, info['time'])))
                print_loss, tic = 0, time()

            self.global_step = self.global_step + 1

        model_score = self.evaluate(epoch + 1)
        self.save_checkpoint(epoch, model_score)