def train(class_dist_threshold_list): G.train() F1.train() optimizer_g = optim.SGD(params, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f = optim.SGD(list(F1.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) def zero_grad_all(): optimizer_g.zero_grad() optimizer_f.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f = [] for param_group in optimizer_f.param_groups: param_lr_f.append(param_group["lr"]) # Setting the loss function to be used for the classification loss if args.loss == 'CE': criterion = nn.CrossEntropyLoss().to(device) if args.loss == 'FL': criterion = FocalLoss(alpha=1, gamma=args.gamma).to(device) if args.loss == 'CBFL': # Calculating the list having the number of examples per class which is going to be used in the CB focal loss beta = args.beta effective_num = 1.0 - np.power(beta, class_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len( class_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).to(device) criterion = CBFocalLoss(weight=per_cls_weights, gamma=args.gamma).to(device) all_step = args.steps data_iter_s = iter(source_loader) data_iter_t = iter(target_loader) data_iter_t_unl = iter(target_loader_unl) len_train_source = len(source_loader) len_train_target = len(target_loader) len_train_target_semi = len(target_loader_unl) best_acc = 0 counter = 0 """ x = torch.load("./freezed_models/alexnet_p2r.ckpt.best.pth.tar") G.load_state_dict(x['G_state_dict']) F1.load_state_dict(x['F1_state_dict']) optimizer_f.load_state_dict(x['optimizer_f']) optimizer_g.load_state_dict(x['optimizer_g']) """ reg_weight = args.reg for step in range(all_step): optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, init_lr=args.lr) optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step, init_lr=args.lr) lr = optimizer_f.param_groups[0]['lr'] # condition for restarting the iteration for each of the data loaders if step % len_train_target == 0: data_iter_t = iter(target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) with torch.no_grad(): im_data_s.resize_(data_s[0].size()).copy_(data_s[0]) gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1]) im_data_t.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1]) im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() if args.uda == 1: data = im_data_s target = gt_labels_s else: data = torch.cat((im_data_s, im_data_t), 0) target = torch.cat((gt_labels_s, gt_labels_t), 0) #print(data.shape) output = G(data) out1 = F1(output) if args.attribute is not None: if args.net == 'resnet34': reg_loss = regularizer(F1.fc3.weight, att) loss = criterion(out1, target) + reg_weight * reg_loss else: reg_loss = regularizer(F1.fc2.weight, att) loss = criterion(out1, target) + reg_weight * reg_loss else: reg_loss = torch.tensor(0) loss = criterion(out1, target) if args.attribute is not None: if step % args.save_interval == 0 and step != 0: reg_weight = 0.5 * reg_weight print("Reduced Reg weight to: ", reg_weight) loss.backward(retain_graph=True) optimizer_g.step() optimizer_f.step() zero_grad_all() if not args.method == 'S+T': output = G(im_data_tu) if args.method == 'ENT': loss_t = entropy(F1, output, args.lamda) #print(loss_t.cpu().data.item()) loss_t.backward() optimizer_f.step() optimizer_g.step() elif args.method == 'MME': loss_t = adentropy(F1, output, args.lamda, class_dist_threshold_list) loss_t.backward() optimizer_f.step() optimizer_g.step() else: raise ValueError('Method cannot be recognized.') log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Reg: {:.6f} Loss T {:.6f} ' \ 'Method {}\n'.format(args.source, args.target, step, lr, loss.data, reg_weight*reg_loss.data, -loss_t.data, args.method) else: log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Reg: {:.6f} Method {}\n'.\ format(args.source, args.target, step, lr, loss.data, reg_weight * reg_loss.data, args.method) G.zero_grad() F1.zero_grad() zero_grad_all() if step % args.log_interval == 0: print(log_train) if step % args.save_interval == 0 and step > 0: loss_val, acc_val = test(target_loader_val) loss_test, acc_test = test(target_loader_test) G.train() F1.train() if acc_val >= best_acc: best_acc = acc_val best_acc_test = acc_test counter = 0 else: counter += 1 if args.early: if counter > args.patience: break print('best acc test %f best acc val %f' % (best_acc_test, acc_val)) print('record %s' % record_file) with open(record_file, 'a') as f: f.write('step %d best %f final %f \n' % (step, best_acc_test, acc_val)) G.train() F1.train() #saving model as a checkpoint dict having many things if args.save_check: print('saving model') is_best = True if counter == 0 else False save_mymodel( args, { 'step': step, 'arch': args.net, 'G_state_dict': G.state_dict(), 'F1_state_dict': F1.state_dict(), 'best_acc_test': best_acc_test, 'optimizer_g': optimizer_g.state_dict(), 'optimizer_f': optimizer_f.state_dict(), }, is_best, time_stamp)
def train(): G.train() F1.train() optimizer_g = optim.SGD(params, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f = optim.SGD(list(F1.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) def zero_grad_all(): optimizer_g.zero_grad() optimizer_f.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f = [] for param_group in optimizer_f.param_groups: param_lr_f.append(param_group["lr"]) criterion = nn.CrossEntropyLoss().cuda() all_step = args.steps data_iter_s = iter(source_loader) data_iter_t = iter(target_loader) data_iter_t_unl = iter(target_loader_unl) len_train_source = len(source_loader) len_train_target = len(target_loader) len_train_target_semi = len(target_loader_unl) best_acc = 0 counter = 0 for step in range(all_step): optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, init_lr=args.lr) optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step, init_lr=args.lr) lr = optimizer_f.param_groups[0]['lr'] if step % len_train_target == 0: data_iter_t = iter(target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) im_data_s.data.resize_(data_s[0].size()).copy_(data_s[0]) gt_labels_s.data.resize_(data_s[1].size()).copy_(data_s[1]) im_data_t.data.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.data.resize_(data_t[1].size()).copy_(data_t[1]) im_data_tu.data.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() data = torch.cat((im_data_s, im_data_t), 0) target = torch.cat((gt_labels_s, gt_labels_t), 0) output = G(data) out1 = F1(output) loss = criterion(out1, target) loss.backward(retain_graph=True) optimizer_g.step() optimizer_f.step() zero_grad_all() if not args.method == 'S+T': output = G(im_data_tu) if args.method == 'ENT': loss_t = entropy(F1, output, args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() elif args.method == 'MME': loss_t = adentropy(F1, output, args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() else: raise ValueError('Method cannot be recognized.') log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Loss T {:.6f} ' \ 'Method {}\n'.format(args.source, args.target, step, lr, loss.data, -loss_t.data, args.method) else: log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Method {}\n'.\ format(args.source, args.target, step, lr, loss.data, args.method) G.zero_grad() F1.zero_grad() zero_grad_all() if step % args.log_interval == 0: print(log_train) if step % args.save_interval == 0 and step > 0: loss_test, acc_test = test(target_loader_test) loss_val, acc_val = test(target_loader_val) G.train() F1.train() if acc_val >= best_acc: best_acc = acc_val best_acc_test = acc_test counter = 0 else: counter += 1 if args.early: if counter > args.patience: break print('best acc test %f best acc val %f' % (best_acc_test, acc_val)) print('record %s' % record_file) with open(record_file, 'a') as f: f.write('step %d best %f final %f \n' % (step, best_acc_test, acc_val)) G.train() F1.train() if args.save_check: print('saving model') torch.save(G.state_dict(), os.path.join(args.checkpath, "G_iter_model_{}_{}_" "to_{}_step_{}.pth.tar". format(args.method, args.source, args.target, step))) torch.save(F1.state_dict(), os.path.join(args.checkpath, "F1_iter_model_{}_{}_" "to_{}_step_{}.pth.tar". format(args.method, args.source, args.target, step)))
def train(): G.train() F1.train() optimizer_g = optim.SGD(params, momentum=0.9, lr=args.lr, weight_decay=0.0005, nesterov=True) optimizer_f = optim.SGD(list(F1.parameters()), lr=args.lr, momentum=0.9, weight_decay=0.0005, nesterov=True) def zero_grad_all(): optimizer_g.zero_grad() optimizer_f.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f = [] for param_group in optimizer_f.param_groups: param_lr_f.append(param_group["lr"]) criterion = nn.CrossEntropyLoss().cuda() all_step = args.steps data_iter_s = iter(source_loader) data_iter_t = iter(target_loader) data_iter_t_unl = iter(target_loader_unl) len_train_source = len(source_loader) len_train_target = len(target_loader) len_train_target_semi = len(target_loader_unl) best_acc = 0 counter = 0 sch_g = optim.lr_scheduler.StepLR(optimizer_g, 100, 0.8) sch_f = optim.lr_scheduler.StepLR(optimizer_f, 100, 0.8) # Tensorboard writer = SummaryWriter(log_dir=args.checkpath) for step in range(all_step): #optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, # init_lr=args.lr) #optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step, # init_lr=args.lr) lr = optimizer_f.param_groups[0]['lr'] # Tensorboard ; record lr writer.add_scalar("Others/lr", lr, step) if step % len_train_target == 0: data_iter_t = iter(target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) with torch.no_grad(): im_data_s.resize_(data_s[0].size()).copy_(data_s[0]) gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1]) im_data_t.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1]) im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() data = torch.cat((im_data_s, im_data_t), 0) target = torch.cat((gt_labels_s, gt_labels_t), 0) output = G(data) out1 = F1(output) loss = criterion(out1, target) loss.backward(retain_graph=True) optimizer_g.step() optimizer_f.step() zero_grad_all() if not args.method == 'S+T': output = G(im_data_tu) if args.method == 'ENT': loss_t = entropy(F1, output, args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() elif args.method == 'MME': loss_t = adentropy(F1, output, args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() else: raise ValueError('Method cannot be recognized.') log_train = 'S: {}; T: {}; Train Ep: {}; lr={} \t ' \ 'Loss Classification: {:.6f}; Loss T: {:.6f}; ' \ 'Method: {}\n'.format(args.source, args.target, step, lr, loss.data, -loss_t.data, args.method) writer.add_scalar("Loss/Entropy_loss", -loss_t.data, step) else: log_train = 'S: {}; T: {}; Train Ep: {}; lr={} \t ' \ 'Loss Classification: {:.6f}; Method: {}\n'.\ format(args.source, args.target, step, lr, loss.data, args.method) # Tensorboard ; record lr writer.add_scalar("Loss/Classification_loss", loss.data, step) G.zero_grad() F1.zero_grad() zero_grad_all() if step % args.log_interval == 0: print(log_train) sch_g.step() sch_f.step() if step % args.save_interval == 0 and step > 0: print('---------------------------------') print('Strain:') loss_strain, acc_strain = test(source_loader) writer.add_scalar("Accuracy/Source_Testing_Acc", acc_strain, step) print('---------------------------------') print('Ttrain:') loss_unl, acc_unl = test(target_loader_unl) writer.add_scalar("Accuracy/Target_Training_Acc", acc_unl, step) print('---------------------------------') print('Ttest:') loss_test, acc_test = test(target_loader_test) writer.add_scalar("Accuracy/Target_Testing_Acc", acc_test, step) G.train() F1.train() if acc_unl >= best_acc: best_acc = acc_unl best_acc_test = acc_test counter = 0 else: counter += 1 if args.early: if counter > args.patience: break print('best acc test %f best acc unl %f' % (best_acc_test, acc_unl)) print('record %s' % record_file) with open(record_file, 'a') as f: f.write('step %d best %f final %f \n' % (step, best_acc_test, acc_unl)) G.train() F1.train() if args.save_check: print('saving model') torch.save( G.state_dict(), os.path.join( args.checkpath, "G_{}_{}.pth".format(args.method, str(int(acc_unl))))) torch.save( F1.state_dict(), os.path.join( args.checkpath, "F1_{}_{}.pth".format(args.method, str(int(acc_unl))))) writer.close()
def train(self): self.G.train() self.F1.train() optimizer_g = optim.SGD(self.params, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f = optim.SGD(list(self.F1.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) def zero_grad_all(): optimizer_g.zero_grad() optimizer_f.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f = [] for param_group in optimizer_f.param_groups: param_lr_f.append(param_group["lr"]) criterion = nn.CrossEntropyLoss().cuda() all_step = self.args.steps data_iter_s = iter(self.source_loader) data_iter_t = iter(self.target_loader) data_iter_t_unl = iter(self.target_loader_unl) len_train_source = len(self.source_loader) len_train_target = len(self.target_loader) len_train_target_semi = len(self.target_loader_unl) for step in range(all_step): optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, init_lr=self.args.lr) optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step, init_lr=self.args.lr) lr = optimizer_f.param_groups[0]['lr'] if step % len_train_target == 0: data_iter_t = iter(self.target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(self.target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(self.source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) self.im_data_s.resize_(data_s[0].size()).copy_(data_s[0]) self.gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1]) self.im_data_t.resize_(data_t[0].size()).copy_(data_t[0]) self.gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1]) self.im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() data = torch.cat((self.im_data_s, self.im_data_t), 0) target = torch.cat((self.gt_labels_s, self.gt_labels_t), 0) output = self.G(data) out1 = self.F1(output) loss = criterion(out1, target) loss.backward(retain_graph=True) optimizer_g.step() optimizer_f.step() zero_grad_all() output = self.G(self.im_data_tu) loss_t = adentropy(self.F1, output, self.args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() log_train = 'S {} T {} Train Ep: {} lr{} \t Loss Classification: {:.6f} Method {}\n'.format( self.args.source, self.args.target, step, lr, loss.data, self.args.method) self.G.zero_grad() self.F1.zero_grad() if step % self.args.log_interval == 0: print(log_train) if step % self.args.save_interval == 0 and step > 0: self.test(self.target_loader_unl) self.G.train() self.F1.train() if self.args.save_check: print('saving model') torch.save( self.G.state_dict(), os.path.join( self.args.checkpath, "G_iter_model_{}_{}_to_{}_step_{}.pth.tar".format( self.args.method, self.args.source, self.args.target, step))) torch.save( self.F1.state_dict(), os.path.join( self.args.checkpath, "F1_iter_model_{}_{}_to_{}_step_{}.pth.tar".format( self.args.method, self.args.source, self.args.target, step)))
def train(): G.train() F1.train() optimizer_g = optim.SGD(params, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f = optim.SGD(list(F1.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) # Loading the states of the two optmizers optimizer_g.load_state_dict(main_dict['optimizer_g']) optimizer_f.load_state_dict(main_dict['optimizer_f']) print("Loaded optimizer states") def zero_grad_all(): optimizer_g.zero_grad() optimizer_f.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f = [] for param_group in optimizer_f.param_groups: param_lr_f.append(param_group["lr"]) # Setting the loss function to be used for the classification loss if args.loss == 'CE': criterion = nn.CrossEntropyLoss().to(device) if args.loss == 'FL': criterion = FocalLoss(alpha=1, gamma=1).to(device) if args.loss == 'CBFL': # Calculating the list having the number of examples per class which is going to be used in the CB focal loss beta = 0.99 effective_num = 1.0 - np.power(beta, class_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len( class_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).to(device) criterion = CBFocalLoss(weight=per_cls_weights, gamma=0.5).to(device) all_step = args.steps data_iter_s = iter(source_loader) data_iter_t = iter(target_loader) data_iter_t_unl = iter(target_loader_unl) len_train_source = len(source_loader) len_train_target = len(target_loader) len_train_target_semi = len(target_loader_unl) best_acc = 0 counter = 0 for step in range(all_step): optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, init_lr=args.lr) optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step, init_lr=args.lr) lr = optimizer_f.param_groups[0]['lr'] # condition for restarting the iteration for each of the data loaders if step % len_train_target == 0: data_iter_t = iter(target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) with torch.no_grad(): im_data_s.resize_(data_s[0].size()).copy_(data_s[0]) gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1]) im_data_t.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1]) im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() data = torch.cat((im_data_s, im_data_t), 0) target = torch.cat((gt_labels_s, gt_labels_t), 0) output = G(data) out1 = F1(output) loss = criterion(out1, target) loss.backward(retain_graph=True) optimizer_g.step() optimizer_f.step() zero_grad_all() # list of the weights and image paths in this batch img_paths = list(data_t_unl[2]) df1 = df.loc[df['img'].isin(img_paths)] df1 = df1['weight'] weight_list = list(df1) if not args.method == 'S+T': output = G(im_data_tu) if args.method == 'ENT': loss_t = entropy(F1, output, args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() elif args.method == 'MME': loss_t = adentropy(F1, output, args.lamda, weight_list) loss_t.backward() optimizer_f.step() optimizer_g.step() else: raise ValueError('Method cannot be recognized.') log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Loss T {:.6f} ' \ 'Method {}\n'.format(args.source, args.target, step, lr, loss.data, -loss_t.data, args.method) else: log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Method {}\n'.\ format(args.source, args.target, step, lr, loss.data, args.method) G.zero_grad() F1.zero_grad() zero_grad_all() if step % args.log_interval == 0: print(log_train) if step % args.save_interval == 0 and step > 0: loss_val, acc_val = test(target_loader_val) loss_test, acc_test = test(target_loader_test) G.train() F1.train() if acc_test >= best_acc: best_acc = acc_test best_acc_test = acc_test counter = 0 else: counter += 1 if args.early: if counter > args.patience: break print('best acc test %f best acc val %f' % (best_acc_test, acc_val)) print('record %s' % record_file) with open(record_file, 'a') as f: f.write('step %d best %f final %f \n' % (step, best_acc_test, acc_val)) G.train() F1.train() #saving model as a checkpoint dict having many things if args.save_check: print('saving model') is_best = True if counter == 0 else False save_mymodel( args, { 'step': step, 'arch': args.net, 'G_state_dict': G.state_dict(), 'F1_state_dict': F1.state_dict(), 'best_acc_test': best_acc_test, 'optimizer_g': optimizer_g.state_dict(), 'optimizer_f': optimizer_f.state_dict(), }, is_best)
def train(): G.train() F1.train() optimizer_g = optim.SGD(params, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f = optim.SGD(list(F1.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) def zero_grad_all(): optimizer_g.zero_grad() optimizer_f.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f = [] for param_group in optimizer_f.param_groups: param_lr_f.append(param_group["lr"]) #criterion = nn.CrossEntropyLoss().cuda() beta = 0.99 effective_num = 1.0 - np.power(beta, class_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len( class_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).cuda() criterion = FocalLoss(weight=per_cls_weights, gamma=0.5).cuda() all_step = args.steps data_iter_s = iter(source_loader) data_iter_t = iter(target_loader) data_iter_t_unl = iter(target_loader_unl) len_train_source = len(source_loader) len_train_target = len(target_loader) len_train_target_semi = len(target_loader_unl) best_acc_test = 0 counter = 0 for step in range(all_step): optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, init_lr=args.lr) optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step, init_lr=args.lr) lr = optimizer_f.param_groups[0]['lr'] if step % len_train_target == 0: data_iter_t = iter(target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) im_data_s.data.resize_(data_s[0].size()).copy_(data_s[0]) gt_labels_s.data.resize_(data_s[1].size()).copy_(data_s[1]) im_data_t.data.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.data.resize_(data_t[1].size()).copy_(data_t[1]) im_data_tu.data.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() data = torch.cat((im_data_s, im_data_t), 0) target = torch.cat((gt_labels_s, gt_labels_t), 0) output = G(data) out1 = F1(output) loss = criterion(out1, target) loss.backward(retain_graph=True) optimizer_g.step() optimizer_f.step() zero_grad_all() if not args.method == 'S+T': output = G(im_data_tu) if args.method == 'ENT': loss_t = entropy(F1, output, args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() elif args.method == 'MME': loss_t = adentropy(F1, output, args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() else: raise ValueError('Method cannot be recognized.') log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Loss T {:.6f} ' \ 'Method {}\n'.format(args.source, args.target, step, lr, loss.data, -loss_t.data, args.method) else: log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Method {}\n'.\ format(args.source, args.target, step, lr, loss.data, args.method) G.zero_grad() F1.zero_grad() zero_grad_all() if step % args.log_interval == 0: print(log_train) if step % args.save_interval == 0 and step > 0: loss_test, acc_test = test(target_loader_test) loss_val, acc_val = test(target_loader_val) G.train() F1.train() if acc_test > best_acc_test: best_acc = acc_val best_acc_test = acc_test counter = 0 else: counter += 1 if args.early: if counter > args.patience: break print('best acc test %f best acc val %f' % (best_acc_test, acc_val)) print('record %s' % record_file) with open(record_file, 'a') as f: f.write('step %d best %f final %f \n' % (step, best_acc_test, acc_val)) G.train() F1.train() if args.save_check: print('saving model...') is_best = True if counter == 0 else False save_mymodel( args, { 'step': step, 'arch': args.net, 'G_state_dict': G.state_dict(), 'F1_state_dict': F1.state_dict(), 'best_acc_test': best_acc_test, 'optimizer_g': optimizer_g.state_dict(), 'optimizer_f': optimizer_f.state_dict(), }, is_best)
def train(): G.train() F1.train() F2.train() optimizer_g = optim.SGD(params, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f1 = optim.SGD(list(F1.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f2 = optim.SGD(list(F2.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) def zero_grad_all(): optimizer_g.zero_grad() optimizer_f1.zero_grad() optimizer_f2.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f1 = [] for param_group in optimizer_f1.param_groups: param_lr_f1.append(param_group["lr"]) param_lr_f2 = [] for param_group in optimizer_f2.param_groups: param_lr_f2.append(param_group["lr"]) criterion = nn.CrossEntropyLoss().cuda() all_step = args.steps data_iter_s = iter(source_loader) data_iter_t = iter(target_loader) data_iter_t_unl = iter(target_loader_unl) len_train_source = len(source_loader) len_train_target = len(target_loader) len_train_target_semi = len(target_loader_unl) best_acc = 0 counter = 0 for step in range(all_step): # optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, # init_lr=args.lr) # optimizer_f1 = inv_lr_scheduler(param_lr_f1, optimizer_f1, step, # init_lr=args.lr) # optimizer_f2 = inv_lr_scheduler(param_lr_f2, optimizer_f2, step, # init_lr=args.lr) lr = optimizer_f1.param_groups[0]['lr'] if step % len_train_target == 0: data_iter_t = iter(target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) im_data_s.resize_(data_s[0].size()).copy_(data_s[0]) gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1]) im_data_t.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1]) im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() data = torch.cat((im_data_s, im_data_t), 0) # target = torch.cat((gt_labels_s, gt_labels_t), 0) output = G(data) output_s = output[:len(im_data_s)] output_t = output[len(im_data_s):] # output_tu = output[len(im_data_s)+len(im_data_t):] out_1t = F1(output_t) out_1s = F1(output_s) out_2t = F2(output_t) out_2s = F2(output_s) # out_1tu = F1(output_tu) # out_2tu = F2(output_tu) # # pseudo_label_1 = torch.softmax(out_1tu.detach_(), dim=-1) # pseudo_label_2 = torch.softmax(out_2tu.detach_(), dim=-1) # max_probs_1, targets_u_1 = torch.max(pseudo_label_1, dim=-1) # max_probs_2, targets_u_2 = torch.max(pseudo_label_2, dim=-1) # mask = (targets_u_1 == targets_u_2).float() ## Source-based Classifier loss: L1 loss_1t = criterion( out_1t, gt_labels_t ) #+ (F.cross_entropy(out_1tu, targets_u_1, reduction='none') * mask).mean() # mask = torch.cat((torch.ones_like(gt_labels_t).float(), mask), 0) # loss_1t = (F.cross_entropy(torch.cat((out_1t, out_1tu), 0), # torch.cat((gt_labels_t, targets_u_1), 0), reduction='none') * mask).mean() loss_1s = criterion(out_1s, gt_labels_s) # entropy_s = adentropy(F1, output_s, args.beta) # entropy_tu = adentropy(F2, output_tu, args.lamda) loss_1 = args.alpha * loss_1s + (1 - args.alpha) * loss_1t ## Target-based Classifier loss loss_2t = criterion( out_2t, gt_labels_t ) #+ (F.cross_entropy(out_2tu, targets_u_2, reduction='none') * mask).mean() # loss_2t = (F.cross_entropy(torch.cat((out_2t, out_2tu), 0), # torch.cat((gt_labels_t, targets_u_2), 0), reduction='none') * mask).mean() loss_2s = criterion(out_2s, gt_labels_s) loss_2 = args.alpha * loss_2t + (1 - args.alpha) * loss_2s loss_1.backward(retain_graph=True) loss_2.backward(retain_graph=True) optimizer_g.step() optimizer_f1.step() optimizer_f2.step() zero_grad_all() output = G(torch.cat((im_data_s, im_data_tu), 0)) output_s = output[:len(im_data_s)] output_tu = output[len(im_data_s):] entropy_s = adentropy(F1, output_s, args.beta) entropy_tu = -adentropy(F2, output_tu, args.lamda) entropy = entropy_s + entropy_tu entropy.backward(retain_graph=True) optimizer_f1.step() # entropy_tu.backward(retain_graph=True) optimizer_f2.step() optimizer_g.step() log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss_1 Classification: {:.6f} Loss_2 Classification: {:.6f} ' \ 'Entropy_S {:.6f} Entropy_TU {:.6f}\n'.format(args.source, args.target, step, lr, loss_1.data, loss_2.data, entropy_s.data, entropy_tu.data) # G.zero_grad() # F1.zero_grad() # F2.zero_grad() # zero_grad_all() if step % args.log_interval == 0: print(log_train) if step % args.save_interval == 0 and step > 0: loss_test, acc_test = test(target_loader_test) loss_val, acc_val = test(target_loader_val) G.train() F1.train() F2.train() if acc_val >= best_acc: best_acc = acc_val best_acc_test = acc_test counter = 0 else: counter += 1 if args.early: if counter > args.patience: break print('best acc test %f best acc val %f' % (best_acc_test, acc_val)) print('record %s' % record_file) with open(record_file, 'a') as f: f.write('step %d best %f final %f \n' % (step, best_acc_test, acc_val)) G.train() F1.train() F2.train() if args.save_check: print('saving model') torch.save( G.state_dict(), os.path.join( args.checkpath, "G_iter_model_{}_{}_" "to_{}_step_{}.pth.tar".format(args.method, args.source, args.target, step))) torch.save( F2.state_dict(), os.path.join( args.checkpath, "F2_iter_model_{}_{}_" "to_{}_step_{}.pth.tar".format(args.method, args.source, args.target, step)))