def train(self, train_loader, save_path, finetune=False): # writer = SummaryWriter(log_dir="log_info") self.G.train(finetune=finetune) if finetune: self.optm_G = optim.Adam(filter(lambda p: p.requires_grad, self.G.parameters()), lr=5e-5) keep_training = True print("Starting training from iteration:{:d}".format(self.iter)) s_time = time.time() while keep_training: for items in train_loader: gt_images, masks = self.__cuda__(*items) masked_images = gt_images * masks self.forward(masked_images, masks, gt_images) self.update_parameters() self.iter += 1 if self.iter % 50 == 0: e_time = time.time() int_time = e_time - s_time print("Iteration:%d, l1_loss:%.4f, time_taken:%.2f" % (self.iter, self.l1_loss_val / 50, int_time)) s_time = time.time() self.l1_loss_val = 0.0 if self.iter % 40000 == 0: if not os.path.exists('{:s}'.format(save_path)): os.makedirs('{:s}'.format(save_path)) save_ckpt('{:s}/g_{:d}.pth'.format(save_path, self.iter), [('generator', self.G)], [('optimizer_G', self.optm_G)], self.iter)
def train(self): writer = SummaryWriter(log_dir="log_info") self.G.train() if self.opt.finetune: print("here") self.optm_G = optim.Adam(filter(lambda p:p.requires_grad, self.G.parameters()), lr = self.lr) train_loader = DataLoader( dataset=self.train_dataset, batch_size=self.opt.batch_size, num_workers=self.opt.n_threads, drop_last=True, shuffle=True ) keep_training = True epoch = 0 i = self.start_iter print("starting training") s_time = time.time() while keep_training: epoch += 1 print("epoch: {:d}".format(epoch)) for items in train_loader: i += 1 gt_images, gray_image, gt_edges, masks = self.cuda(*items) # masks = torch.cat([masks]*3, dim = 1) self.gray_image = gray_image masked_images = gt_images * masks masked_edges = gt_edges * masks[:,0:1,:,:] self.forward(masked_images, masks, masked_edges, gt_images, gt_edges) self.update_parameters() if i % self.opt.log_interval == 0: e_time = time.time() int_time = e_time - s_time print("epoch:{:d}, iteration:{:d}".format(epoch, i), ", l1_loss:", self.l1_loss/self.opt.log_interval, ", time_taken:", int_time) writer.add_scalars("loss_val", {"l1_loss":self.l1_loss*self.opt.batch_size/self.opt.log_interval, "D_loss":self.D_loss/self.opt.log_interval,"E_loss":self.E_loss*self.opt.batch_size/self.opt.log_interval}, i) masked_images = masked_images.cpu() fake_images = self.fake_B.cpu() fake_edges = self.edge_fake[1].cpu() fake_edges = torch.cat([fake_edges]*3, dim = 1) images = torch.cat([masked_images[0:3], fake_images[0:3], fake_edges[0:3]], dim = 0) writer.add_images("imgs", images, i) s_time = time.time() self.l1_loss = 0.0 self.D_loss = 0.0 self.E_loss = 0.0 if i % self.opt.save_interval == 0: save_ckpt('{:s}/ckpt/g_{:d}.pth'.format(self.opt.save_dir, i ), [('generator', self.G)], [('optimizer_G', self.optm_G)], i ) if self.have_D: save_ckpt('{:s}/ckpt/d_{:d}.pth'.format(self.opt.save_dir, i ), [('edge_D', self.edge_D)], [('optimizer_ED', self.optm_ED)], i ) writer.close()
def train(self, train_loader, save_path, finetune=False, iters=450000, fp16=False, multi_gpu=True): writer = SummaryWriter() self.G.train(finetune=finetune) # Overwrite optimizer with a lower lr if finetune: self.optm_G = optim.Adam(filter(lambda p: p.requires_grad, self.G.parameters()), lr=self.learning_rates["finetune"]) self.fp16 = fp16 and GOT_AMP if self.fp16: self.G, self.optm_G = amp.initialize(self.G, self.optm_G, opt_level="O1") if self.lossNet is not None: self.lossNet = amp.initialize(self.lossNet, opt_level="O1") if multi_gpu: self.multi_gpu() print("Starting training from iteration: {:d}, finetuning: {}".format( self.iter, finetune)) s_time = time.time() while self.iter < iters: for items in train_loader: gt_images, masks = self.__cuda__(*items) masked_images = gt_images * masks self.forward(masked_images, masks, gt_images) self.update_parameters() for k, v in self.metrics["lossG"].items(): writer.add_scalar(f"lossG/{k}", v, global_step=self.iter) self.iter += 1 if self.iter % 200 == 0: e_time = time.time() int_time = e_time - s_time print("Iteration:%d, l1_loss:%.4f, time_taken:%.2f" % (self.iter, self.l1_loss_val / 50, int_time)) writer.add_images("real_A", self.real_A, global_step=self.iter) writer.add_images("mask", self.mask, global_step=self.iter) writer.add_images("real_B", self.real_B, global_step=self.iter) writer.add_images("fake_B", self.fake_B, global_step=self.iter) writer.add_images("comp_B", self.comp_B, global_step=self.iter) # Reset s_time = time.time() self.l1_loss_val = 0.0 if self.iter % self.save_freq == 0: if not os.path.exists('{:s}'.format(save_path)): os.makedirs('{:s}'.format(save_path)) save_ckpt( '{:s}/g_{:d}{}.pth'.format( save_path, self.iter, "_finetune" if finetune else ""), [('generator', self.G)], [('optimizer_G', self.optm_G)], self.iter) if not os.path.exists('{:s}'.format(save_path)): os.makedirs('{:s}'.format(save_path)) save_ckpt( '{:s}/g_{:s}{}.pth'.format(save_path, "final", "_finetune" if finetune else ""), [('generator', self.G)], [('optimizer_G', self.optm_G)], self.iter)
def main(): image_size = [args.IMAGE_SHAPE[0], args.IMAGE_SHAPE[1]] if args.model_name is not None: model_save_dir = './snapshots/' + args.model_name + '/ckpt/' sample_dir = './snapshots/' + args.model_name + '/images/' log_dir = './logs/' + args.model_name else: model_save_dir = os.path.join(args.save_dir, 'ckpt') sample_dir = os.path.join(args.save_dir, 'images') log_dir = args.log_dir if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) if not os.path.exists(sample_dir): os.makedirs(sample_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) with open(os.path.join(log_dir, 'config.yml'), 'w') as f: yaml.dump(vars(args), f) writer = SummaryWriter(log_dir=log_dir) torch.manual_seed(7777777) if not args.CPU: torch.cuda.manual_seed(7777777) flow_resnet = resnet_models.Flow_Branch_Multi(input_chanels=66, NoLabels=4) saved_state_dict = torch.load(args.RESNET_PRETRAIN_MODEL) for i in saved_state_dict: if 'conv1.' in i[:7]: conv1_weight = saved_state_dict[i] conv1_weight_mean = torch.mean(conv1_weight, dim=1, keepdim=True) conv1_weight_new = (conv1_weight_mean / 66.0).repeat(1, 66, 1, 1) saved_state_dict[i] = conv1_weight_new flow_resnet.load_state_dict(saved_state_dict, strict=False) flow_resnet = nn.DataParallel(flow_resnet).cuda() flow_resnet.train() optimizer = optim.SGD([{ 'params': get_1x_lr_params(flow_resnet.module), 'lr': args.LR }, { 'params': get_10x_lr_params(flow_resnet.module), 'lr': 10 * args.LR }], lr=args.LR, momentum=0.9, weight_decay=args.WEIGHT_DECAY) train_dataset = FlowSeq(args) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=args.n_threads) if args.resume: if args.PRETRAINED_MODEL is not None: resume_iter = load_ckpt(args.PRETRAINED_MODEL, [('model', flow_resnet)], [('optimizer', optimizer)], strict=True) print('Model Resume from', resume_iter, 'iter') else: print('Cannot load Pretrained Model') return if args.PRETRAINED: if args.PRETRAINED_MODEL is not None: resume_iter = load_ckpt(args.PRETRAINED_MODEL, [('model', flow_resnet)], strict=True) print('Model Resume from', resume_iter, 'iter') train_iterator = iter(train_loader) loss = {} start_iter = 0 if not args.resume else resume_iter for i in tqdm(range(start_iter, args.max_iter)): try: flow_mask_cat, flow_masked, gt_flow, mask = next(train_iterator) except: print('Loader Restart') train_iterator = iter(train_loader) flow_mask_cat, flow_masked, gt_flow, mask = next(train_iterator) input_x = flow_mask_cat.cuda() gt_flow = gt_flow.cuda() mask = mask.cuda() flow_masked = flow_masked.cuda() flow1x = flow_resnet(input_x) f_res = flow1x[:, :2, :, :] r_res = flow1x[:, 2:, :, :] # fake_flow_f = f_res * mask[:,10:12,:,:] + flow_masked[:,10:12,:,:] * (1. - mask[:,10:12,:,:]) # fake_flow_r = r_res * mask[:,32:34,:,:] + flow_masked[:,32:34,:,:] * (1. - mask[:,32:34,:,:]) loss['1x_recon'] = L.L1_mask(f_res, gt_flow[:, :2, :, :], mask[:, 10:12, :, :]) loss['1x_recon'] += L.L1_mask(r_res, gt_flow[:, 2:, ...], mask[:, 32:34, ...]) loss['f_recon_hard'], new_mask = L.L1_mask_hard_mining( f_res, gt_flow[:, :2, :, :], mask[:, 10:11, :, :]) loss['r_recon_hard'], new_mask = L.L1_mask_hard_mining( r_res, gt_flow[:, 2:, ...], mask[:, 32:33, ...]) loss_total = loss['1x_recon'] + args.LAMBDA_HARD * ( loss['f_recon_hard'] + loss['r_recon_hard']) if i % args.NUM_ITERS_DECAY == 0: adjust_learning_rate(optimizer, i, args.lr_decay_steps) print('LR has been changed') optimizer.zero_grad() loss_total.backward() optimizer.step() if i % args.PRINT_EVERY == 0: print('=========================================================') print(args.model_name, "Rank[{}] Iter [{}/{}]".format(0, i + 1, args.max_iter)) print('=========================================================') print_loss_dict(loss) write_loss_dict(loss, writer, i) if (i + 1) % args.MODEL_SAVE_STEP == 0: save_ckpt(os.path.join(model_save_dir, 'DFI_%d.pth' % i), [('model', flow_resnet)], [('optimizer', optimizer)], i) print('Model has been saved at %d Iters' % i) writer.close()
def train(self, train_loader, save_path, finetune=False, iters=450000, batch_size=6, batch_preload_count=1): # writer = SummaryWriter(log_dir="log_info") self.G.train(finetune=finetune) if finetune: self.optm_G = optim.Adam(filter(lambda p: p.requires_grad, self.G.parameters()), lr=5e-5) print("Starting training from iteration:{:d}".format(self.iter)) s_time = time.time() while self.iter < iters: for items in train_loader: gt_image_batch, mask_batch, masked_image_batch = self.__cuda__( *items) # print("New batch of %s elements" %(items[0].size()[0])) for batch_idx in range(0, batch_preload_count): left = batch_idx * batch_size right = left + min(batch_size, gt_image_batch.size()[0]) gt_image = gt_image_batch[left:right] mask = mask_batch[left:right] masked_image = masked_image_batch[left:right] if gt_image.size()[0] == 0: break # print(len(train_loader), batch_idx, left, right, gt_image, mask, masked_image) self.forward(masked_image, mask, gt_image) self.update_parameters() self.iter += 1 if self.iter % 50 == 0: e_time = time.time() int_time = e_time - s_time print("Iteration:%d, l1_loss:%.4f, time_taken:%.2f" % (self.iter, self.l1_loss_val / 50, int_time)) s_time = time.time() self.l1_loss_val = 0.0 if self.iter % 40000 == 0: if not os.path.exists('{:s}'.format(save_path)): os.makedirs('{:s}'.format(save_path)) save_ckpt( '{:s}/g_{:d}.pth'.format(save_path, self.iter), [('generator', self.G)], [('optimizer_G', self.optm_G)], self.iter) if self.iter >= iters: break if self.iter >= iters: break print("Finished training iter %d. Saving model." % (self.iter)) if not os.path.exists('{:s}'.format(save_path)): os.makedirs('{:s}'.format(save_path)) # Save final checkpoint save_ckpt('{:s}/g_{:s}.pth'.format(save_path, "final"), [('generator', self.G)], [('optimizer_G', self.optm_G)], self.iter) save_ckpt('{:s}/g_{:s}_{:d}.pth'.format(save_path, "final", self.iter), [('generator', self.G)], [('optimizer_G', self.optm_G)], self.iter)
min_lr=1e-8, verbose=True) optimizer_rl = torch.optim.Adam(agent.parameters(), lr=p.net.lr) scheduler_rl = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_rl, factor=1 / 2, patience=30, threshold=1e-3, min_lr=1e-8, verbose=True) # create logging dirs log_path, log_subpath = build_log_path(subj_id, p, log_root=log_root) # save experiment params initial weights save_all_params(log_subpath['data'], p) save_ckpt(0, log_subpath['ckpts'], agent, optimizer_sup) '''task definition''' log_freq = 200 Log_loss_critic = np.zeros(n_epoch, ) Log_loss_actor = np.zeros(n_epoch, ) Log_loss_sup = np.zeros(n_epoch, ) Log_return = np.zeros(n_epoch, ) Log_pi_ent = np.zeros(n_epoch, ) Log_acc = np.zeros((n_epoch, task.n_parts)) Log_mis = np.zeros((n_epoch, task.n_parts)) Log_dk = np.zeros((n_epoch, task.n_parts)) Log_cond = np.zeros((n_epoch, n_examples)) epoch_id = 0 for epoch_id in np.arange(epoch_id, n_epoch): time0 = time.time()