def train(self): for epoch in range(self.epochs): epoch_G_loss = 0.0 for iter, batch in enumerate(self.train_data, 1): # you must setting data to cpu to gpu using .to(self.device) input, label = batch[0].to(self.device0), batch[1].to( self.device1) # input video가 8개인데 이걸 1개 1개 분리 # pretrained model에 하나하나 집어넣어줌 features = self.extractor(input[:, 0, :, :, :, :]) for i in range(1, args.num_of_vid): # check gpu each gpu memory # check_gpu_mem(0) # check_gpu_mem(1) features = torch.cat( (features, self.extractor(input[:, 0, :, :, :, :])), dim=1) features = features.to(self.device1) self.optimizerG.zero_grad() predict = self.G(features) G_loss = 0 for i in range(self.train_frames): G_loss += self.CE_loss(predict[:, i, :], label[:, i]) G_loss.backward() self.optimizerG.step() epoch_G_loss += G_loss.data print("===> Epoch[{}]({}/{}): G_Loss: {:.4f} ".format( epoch + 1, iter, len(self.train_data), G_loss.item())) # clear gpu cache torch.cuda.empty_cache() avg_G_loss = epoch_G_loss / len(self.train_data) self.avg_G_loss_arr.append(avg_G_loss.item()) # learning rate decay if (epoch + 1) % (self.lr_decay) == 0: for param_group in self.optimizerG.param_groups: param_group['lr'] /= 2.0 print('G: Learning rate decay: lr={}'.format( self.optimizerG.param_groups[0]['lr'])) # checkpoint snapshot if (epoch + 1) % self.checkpoint == 0: # save check point check_name = join(self.save_check_dir, 'epoch_' + str(epoch + 1) + 'checkpoint.pkl') torch.save(self.G, check_name) # make loss graph check_g_name = join(self.save_check_dir, 'epoch_' + str(epoch + 1) + '.png') make_graph(np.array(range(epoch + 1)), np.array(self.avg_G_loss_arr), self.save_mname, check_g_name)
def checkpoint_snapshot(self, epoch): """ Todo do checkpoint snapshot """ # save check point check_name = join(self.save_check_dir, 'epoch_' + str(epoch + 1) + 'checkpoint.pkl') torch.save(self.G, check_name) # make label loss graph check_g_name = join(self.save_check_dir, 'epoch_' + str(epoch + 1) + '_label-loss.png') make_graph(np.array(range(epoch + 1)), np.array(self.avg_G_label_loss_arr), self.save_mname, check_g_name)