def train(self):
        for epoch in range(self.epochs):
            epoch_G_loss = 0.0
            for iter, batch in enumerate(self.train_data, 1):
                # you must setting data to cpu to gpu using .to(self.device)
                input, label = batch[0].to(self.device0), batch[1].to(
                    self.device1)

                # input video가 8개인데 이걸 1개 1개 분리
                # pretrained model에 하나하나 집어넣어줌
                features = self.extractor(input[:, 0, :, :, :, :])
                for i in range(1, args.num_of_vid):
                    # check gpu each gpu memory
                    # check_gpu_mem(0)
                    # check_gpu_mem(1)
                    features = torch.cat(
                        (features, self.extractor(input[:, 0, :, :, :, :])),
                        dim=1)

                features = features.to(self.device1)

                self.optimizerG.zero_grad()
                predict = self.G(features)

                G_loss = 0
                for i in range(self.train_frames):
                    G_loss += self.CE_loss(predict[:, i, :], label[:, i])

                G_loss.backward()
                self.optimizerG.step()

                epoch_G_loss += G_loss.data
                print("===> Epoch[{}]({}/{}): G_Loss: {:.4f} ".format(
                    epoch + 1, iter, len(self.train_data), G_loss.item()))

                # clear gpu cache
                torch.cuda.empty_cache()

            avg_G_loss = epoch_G_loss / len(self.train_data)
            self.avg_G_loss_arr.append(avg_G_loss.item())

            # learning rate decay
            if (epoch + 1) % (self.lr_decay) == 0:
                for param_group in self.optimizerG.param_groups:
                    param_group['lr'] /= 2.0
                print('G: Learning rate decay: lr={}'.format(
                    self.optimizerG.param_groups[0]['lr']))

            # checkpoint snapshot
            if (epoch + 1) % self.checkpoint == 0:
                # save check point
                check_name = join(self.save_check_dir,
                                  'epoch_' + str(epoch + 1) + 'checkpoint.pkl')
                torch.save(self.G, check_name)
                # make loss graph
                check_g_name = join(self.save_check_dir,
                                    'epoch_' + str(epoch + 1) + '.png')
                make_graph(np.array(range(epoch + 1)),
                           np.array(self.avg_G_loss_arr), self.save_mname,
                           check_g_name)
Ejemplo n.º 2
0
    def checkpoint_snapshot(self, epoch):
        """
        Todo do checkpoint snapshot
        """
        # save check point
        check_name = join(self.save_check_dir,
                          'epoch_' + str(epoch + 1) + 'checkpoint.pkl')
        torch.save(self.G, check_name)

        # make label loss graph
        check_g_name = join(self.save_check_dir,
                            'epoch_' + str(epoch + 1) + '_label-loss.png')
        make_graph(np.array(range(epoch + 1)),
                   np.array(self.avg_G_label_loss_arr), self.save_mname,
                   check_g_name)