def test_target(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() args.modelpath = args.output_dir + '/source_F_val.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_B_val.pt' netB.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_C_val.pt' netC.load_state_dict(torch.load(args.modelpath)) netF.eval() netB.eval() netC.eval() acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC) log_str = 'Task: {}, Accuracy = {:.2f}%'.format(args.dset, acc * 100) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n')
def test(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u': netF = network.LeNetBase() #.cuda() elif args.dset == 'm': netF = network.LeNetBase() #.cuda() elif args.dset == 's': netF = network.DTNBase() #.cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck) #.cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck) #.cuda() args.modelpath = args.output_dir + '/F.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/B.pt' netB.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/C.pt' netC.load_state_dict(torch.load(args.modelpath)) netF.eval() netB.eval() netC.eval() acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC) log_str = 'Task: {}, [DONT CARE] Accuracy = {:.2f}%'.format(args.dset, acc) try: args.out_file.write(log_str + '\n') args.out_file.flush() except: pass print(log_str + '\n')
def train(args): ent_loss_record = [] gent_loss_record = [] sent_loss_record = [] total_loss_record = [] dset_loaders = digit_load(args) ## set base network if args.dset == 'u': netF = network.LeNetBase() #.cuda() elif args.dset == 'm': netF = network.LeNetBase() #.cuda() elif args.dset == 's': netF = network.DTNBase() #.cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck) #.cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck) #.cuda() param_group = [] learning_rate = args.lr for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netC.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) acc_init = 0 max_iter = args.max_epoch * len(dset_loaders["train"]) interval_iter = max_iter // 10 iter_num = 0 netF.train() netB.train() netC.train() while iter_num < max_iter: try: inputs_source, strong_inputs, target = iter_source.next() except: iter_source = iter(dset_loaders["train"]) inputs_source, strong_inputs, target = iter_source.next() if inputs_source.size(0) == 1: continue iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) inputs_source = inputs_source #.cuda() outputs_source = netC(netB(netF(inputs_source))) total_loss = torch.tensor(0.0) #.cuda() softmax_out = nn.Softmax(dim=1)(outputs_source) if args.ent: ent_loss = torch.mean(loss.Entropy(softmax_out)) total_loss += ent_loss ent_loss_record.append(ent_loss.detach().cpu()) if args.gent: msoftmax = softmax_out.mean(dim=0) gent_loss = -torch.sum(-msoftmax * torch.log(msoftmax + 1e-5)) gent_loss_record.append(gent_loss.detach().cpu()) total_loss += gent_loss if args.sent: sent_loss = compute_aug_loss(strong_inputs, target, netC, netB, netF) total_loss += sent_loss sent_loss_record.append(sent_loss.detach().cpu()) optimizer.zero_grad() total_loss.backward() optimizer.step() total_loss_record.append(total_loss.detach().cpu()) if iter_num % interval_iter == 0 or iter_num == max_iter: print(iter_num, interval_iter, max_iter) # netF.eval() # netB.eval() # netC.eval() # acc_s_tr, _ = cal_acc(dset_loaders['train'], netF, netB, netC) # acc_s_te, _ = cal_acc(dset_loaders['test'], netF, netB, netC) # log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%/ {:.2f}%'.format(args.dset, iter_num, max_iter, acc_s_tr, acc_s_te) # args.out_file.write(log_str + '\n') # args.out_file.flush() # print(log_str+'\n') # if acc_s_te >= acc_init: # acc_init = acc_s_te # best_netF = netF.state_dict() # best_netB = netB.state_dict() # best_netC = netC.state_dict() # netF.train() # netB.train() # netC.train() best_netF = netF.state_dict() best_netB = netB.state_dict() best_netC = netC.state_dict() torch.save(best_netF, osp.join(args.output_dir, "F.pt")) torch.save(best_netB, osp.join(args.output_dir, "B.pt")) torch.save(best_netC, osp.join(args.output_dir, "C.pt")) fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4, sharex=True, figsize=(16, 8)) ax1.plot(list(range(len(ent_loss_record))), ent_loss_record, 'r') ax2.plot(list(range(len(gent_loss_record))), gent_loss_record, 'g') ax3.plot(list(range(len(sent_loss_record))), sent_loss_record, 'b') ax4.plot(list(range(len(total_loss_record))), total_loss_record, 'm') plt.tight_layout() plt.savefig(args.output_dir + '/loss.png') return netF, netB, netC
def train_target(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() args.modelpath = args.output_dir + '/source_F.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_B.pt' netB.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_C.pt' netC.load_state_dict(torch.load(args.modelpath)) netC.eval() for k, v in netC.named_parameters(): v.requires_grad = False param_group = [] for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) max_iter = args.max_epoch * len(dset_loaders["target"]) interval_iter = len(dset_loaders["target"]) # interval_iter = max_iter // args.interval iter_num = 0 while iter_num < max_iter: optimizer.zero_grad() try: inputs_test, _, tar_idx = iter_test.next() except: iter_test = iter(dset_loaders["target"]) inputs_test, _, tar_idx = iter_test.next() if inputs_test.size(0) == 1: continue if iter_num % interval_iter == 0 and args.cls_par > 0: netF.eval() netB.eval() mem_label = obtain_label(dset_loaders['target_te'], netF, netB, netC, args) mem_label = torch.from_numpy(mem_label).cuda() netF.train() netB.train() iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) inputs_test = inputs_test.cuda() features_test = netB(netF(inputs_test)) outputs_test = netC(features_test) if args.cls_par > 0: pred = mem_label[tar_idx] classifier_loss = args.cls_par * nn.CrossEntropyLoss()( outputs_test, pred) else: classifier_loss = torch.tensor(0.0).cuda() if args.ent: softmax_out = nn.Softmax(dim=1)(outputs_test) entropy_loss = torch.mean(loss.Entropy(softmax_out)) # if args.gent: # msoftmax = softmax_out.mean(dim=0) # entropy_loss -= torch.sum(-msoftmax * torch.log(msoftmax + 1e-5)) im_loss = entropy_loss * args.ent_par classifier_loss += im_loss optimizer.zero_grad() classifier_loss.backward() optimizer.step() if iter_num % interval_iter == 0 or iter_num == max_iter: netF.eval() netB.eval() acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format( args.dset, iter_num, max_iter, acc) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') netF.train() netB.train() if args.issave: torch.save( netF.state_dict(), osp.join(args.output_dir, "target_F_" + args.savename + ".pt")) torch.save( netB.state_dict(), osp.join(args.output_dir, "target_B_" + args.savename + ".pt")) torch.save( netC.state_dict(), osp.join(args.output_dir, "target_C_" + args.savename + ".pt")) return netF, netB, netC
def train_source(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() param_group = [] learning_rate = args.lr for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netC.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) acc_init = 0 max_iter = args.max_epoch * len(dset_loaders["source_tr"]) interval_iter = max_iter // 10 iter_num = 0 netF.train() netB.train() netC.train() while iter_num < max_iter: try: inputs_source, labels_source = iter_source.next() except: iter_source = iter(dset_loaders["source_tr"]) inputs_source, labels_source = iter_source.next() if inputs_source.size(0) == 1: continue iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) inputs_source, labels_source = inputs_source.cuda( ), labels_source.cuda() outputs_source = netC(netB(netF(inputs_source))) classifier_loss = loss.CrossEntropyLabelSmooth( num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) optimizer.zero_grad() classifier_loss.backward() optimizer.step() if iter_num % interval_iter == 0 or iter_num == max_iter: netF.eval() netB.eval() netC.eval() acc_s_tr, _ = cal_acc(dset_loaders['source_tr'], netF, netB, netC) acc_s_te, _ = cal_acc(dset_loaders['source_te'], netF, netB, netC) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%/ {:.2f}%'.format( args.dset, iter_num, max_iter, acc_s_tr, acc_s_te) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') if acc_s_te >= acc_init: acc_init = acc_s_te best_netF = netF.state_dict() best_netB = netB.state_dict() best_netC = netC.state_dict() netF.train() netB.train() netC.train() torch.save(best_netF, osp.join(args.output_dir, "source_F.pt")) torch.save(best_netB, osp.join(args.output_dir, "source_B.pt")) torch.save(best_netC, osp.join(args.output_dir, "source_C.pt")) return netF, netB, netC
def train(args, txt_src, txt_tgt): ## set pre-process dset_loaders = data_load(args, txt_src, txt_tgt) # pdb.set_trace() max_len = max(len(dset_loaders["source"]), len(dset_loaders["target"])) max_iter = args.max_epoch * max_len interval_iter = max_iter // 10 if args.dset == 'u2m': netG = network.LeNetBase().cuda() elif args.dset == 'm2u': netG = network.LeNetBase().cuda() elif args.dset == 's2m': netG = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netG.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() if args.model == 'source': modelpath = args.output_dir + "/source_F.pt" netG.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir + "/source_B.pt" netB.load_state_dict(torch.load(modelpath)) else: modelpath = args.output_dir + "/target_F_" + args.savename + ".pt" netG.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir + "/target_B_" + args.savename + ".pt" netB.load_state_dict(torch.load(modelpath)) netF = nn.Sequential(netB, netC) optimizer_g = optim.SGD(netG.parameters(), lr=args.lr * 0.1) optimizer_f = optim.SGD(netF.parameters(), lr=args.lr) base_network = nn.Sequential(netG, netF) source_loader_iter = iter(dset_loaders["source"]) target_loader_iter = iter(dset_loaders["target"]) list_acc = [] best_ent = 100 for iter_num in range(1, max_iter + 1): base_network.train() lr_scheduler(optimizer_g, init_lr=args.lr * 0.1, iter_num=iter_num, max_iter=max_iter) lr_scheduler(optimizer_f, init_lr=args.lr, iter_num=iter_num, max_iter=max_iter) try: inputs_source, labels_source = source_loader_iter.next() except: source_loader_iter = iter(dset_loaders["source"]) inputs_source, labels_source = source_loader_iter.next() try: inputs_target, _, target_idx = target_loader_iter.next() except: target_loader_iter = iter(dset_loaders["target"]) inputs_target, _, target_idx = target_loader_iter.next() targets_s = torch.zeros(args.batch_size, args.class_num).scatter_( 1, labels_source.view(-1, 1), 1) inputs_s = inputs_source.cuda() targets_s = targets_s.cuda() inputs_t = inputs_target[0].cuda() inputs_t2 = inputs_target[1].cuda() with torch.no_grad(): # compute guessed labels of unlabel samples outputs_u = base_network(inputs_t) outputs_u2 = base_network(inputs_t2) p = (torch.softmax(outputs_u, dim=1) + torch.softmax(outputs_u2, dim=1)) / 2 pt = p**(1 / args.T) targets_u = pt / pt.sum(dim=1, keepdim=True) targets_u = targets_u.detach() #################################################################### all_inputs = torch.cat([inputs_s, inputs_t, inputs_t2], dim=0) all_targets = torch.cat([targets_s, targets_u, targets_u], dim=0) if args.alpha > 0: l = np.random.beta(args.alpha, args.alpha) l = max(l, 1 - l) else: l = 1 idx = torch.randperm(all_inputs.size(0)) input_a, input_b = all_inputs, all_inputs[idx] target_a, target_b = all_targets, all_targets[idx] mixed_input = l * input_a + (1 - l) * input_b mixed_target = l * target_a + (1 - l) * target_b # interleave labeled and unlabed samples between batches to get correct batchnorm calculation mixed_input = list(torch.split(mixed_input, args.batch_size)) mixed_input = utils.interleave(mixed_input, args.batch_size) # s = [sa, sb, sc] # t1 = [t1a, t1b, t1c] # t2 = [t2a, t2b, t2c] # => s' = [sa, t1b, t2c] t1' = [t1a, sb, t1c] t2' = [t2a, t2b, sc] logits = base_network(mixed_input[0]) logits = [logits] for input in mixed_input[1:]: temp = base_network(input) logits.append(temp) # put interleaved samples back # [i[:,0] for i in aa] logits = utils.interleave(logits, args.batch_size) logits_x = logits[0] logits_u = torch.cat(logits[1:], dim=0) train_criterion = utils.SemiLoss() Lx, Lu, w = train_criterion(logits_x, mixed_target[:args.batch_size], logits_u, mixed_target[args.batch_size:], iter_num, max_iter, args.lambda_u) loss = Lx + w * Lu optimizer_g.zero_grad() optimizer_f.zero_grad() loss.backward() optimizer_g.step() optimizer_f.step() if iter_num % interval_iter == 0 or iter_num == max_iter: base_network.eval() acc, py, score, y = cal_acc(dset_loaders["train"], base_network, flag=False) mean_ent = torch.mean(Entropy(score)) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%; Mean Ent = {:.4f}'.format( args.dset + '_train', iter_num, max_iter, acc, mean_ent) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') acc, py, score, y = cal_acc(dset_loaders["test"], base_network, flag=False) mean_ent = torch.mean(Entropy(score)) list_acc.append(acc) if best_ent > mean_ent: val_acc = acc best_ent = mean_ent best_y = y best_py = py best_score = score log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%; Mean Ent = {:.4f}'.format( args.dset + '_test', iter_num, max_iter, acc, mean_ent) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') idx = np.argmax(np.array(list_acc)) max_acc = list_acc[idx] final_acc = list_acc[-1] log_str = '\n==========================================\n' log_str += '\nVal Acc = {:.2f}\nMax Acc = {:.2f}\nFin Acc = {:.2f}\n'.format( val_acc, max_acc, final_acc) args.out_file.write(log_str + '\n') args.out_file.flush() # torch.save(base_network.state_dict(), osp.join(args.output_dir, args.log + ".pt")) # sio.savemat(osp.join(args.output_dir, args.log + ".mat"), {'y':best_y.cpu().numpy(), # 'py':best_py.cpu().numpy(), 'score':best_score.cpu().numpy()}) return base_network, py
def split_target(args): train_bs = args.batch_size if args.dset == 's2m': train_target = mnist.MNIST( './data/mnist/', train=True, download=True, transform=transforms.Compose([ transforms.Resize(32), transforms.Lambda(lambda x: x.convert("RGB")), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) train_target2 = mnist.MNIST_twice( './data/mnist/', train=True, download=True, transform=transforms.Compose([ transforms.Resize(32), transforms.Lambda(lambda x: x.convert("RGB")), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) test_target = mnist.MNIST( './data/mnist/', train=False, download=True, transform=transforms.Compose([ transforms.Resize(32), transforms.Lambda(lambda x: x.convert("RGB")), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) elif args.dset == 'u2m': train_target = mnist.MNIST('./data/mnist/', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])) train_target2 = mnist.MNIST_twice('./data/mnist/', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])) test_target = mnist.MNIST('./data/mnist/', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])) elif args.dset == 'm2u': train_target = usps.USPS( './data/usps/', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), # transforms.Lambda(lambda x: _gaussian_blur(x, sigma=0.1)), transforms.Normalize((0.5, ), (0.5, )) ])) train_target2 = usps.USPS_twice( './data/usps/', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), # transforms.Lambda(lambda x: _gaussian_blur(x, sigma=0.1)), transforms.Normalize((0.5, ), (0.5, )) ])) test_target = usps.USPS( './data/usps/', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), # transforms.Lambda(lambda x: _gaussian_blur(x, sigma=0.1)), transforms.Normalize((0.5, ), (0.5, )) ])) dset_loaders = {} dset_loaders["target_te"] = DataLoader(test_target, batch_size=train_bs, shuffle=False, num_workers=args.worker, drop_last=False) dset_loaders["target"] = DataLoader(train_target, batch_size=train_bs, shuffle=False, num_workers=args.worker, drop_last=False) dset_loaders["target2"] = DataLoader(train_target2, batch_size=train_bs, shuffle=False, num_workers=args.worker, drop_last=False) if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() if args.model == 'source': modelpath = args.output_dir + "/source_F.pt" netF.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir + "/source_B.pt" netB.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir + "/source_C.pt" netC.load_state_dict(torch.load(modelpath)) pass else: modelpath = args.output_dir + "/target_F_" + args.savename + ".pt" netF.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir + "/target_B_" + args.savename + ".pt" netB.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir + "/target_C_" + args.savename + ".pt" netC.load_state_dict(torch.load(modelpath)) netF.eval() netB.eval() netC.eval() start_test = True with torch.no_grad(): iter_test = iter(dset_loaders['target_te']) for i in range(len(dset_loaders['target_te'])): data = iter_test.next() # pdb.set_trace() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) top_pred, predict = torch.max(all_output, 1) acc = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) * 100 mean_ent = loss.Entropy(nn.Softmax(dim=1)(all_output)) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%; Mean Ent = {:.4f}'.format( args.dset + '_test', 0, 0, acc, mean_ent.mean()) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') start_test = True with torch.no_grad(): iter_test = iter(dset_loaders['target']) for i in range(len(dset_loaders['target'])): data = iter_test.next() # pdb.set_trace() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) top_pred, predict = torch.max(all_output, 1) acc = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) * 100 mean_ent = loss.Entropy(nn.Softmax(dim=1)(all_output)) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%; Mean Ent = {:.4f}'.format( args.dset + '_train', 0, 0, acc, mean_ent.mean()) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') if args.ps == 0: est_p = (mean_ent < mean_ent.mean()).sum().item() / mean_ent.size(0) log_str = 'Task: {:.2f}'.format(est_p) print(log_str + '\n') args.out_file.write(log_str + '\n') args.out_file.flush() PS = est_p else: PS = args.ps if args.choice == "ent": value = mean_ent elif args.choice == "maxp": value = -top_pred elif args.choice == "marginp": pred, _ = torch.sort(all_output, 1) value = pred[:, 1] - pred[:, 0] else: value = torch.rand(len(mean_ent)) predict = predict.numpy() train_idx = np.zeros(predict.shape) cls_k = args.class_num for c in range(cls_k): c_idx = np.where(predict == c) c_idx = c_idx[0] c_value = value[c_idx] _, idx_ = torch.sort(c_value) c_num = len(idx_) c_num_s = int(c_num * PS) # print(c, c_num, c_num_s) for ei in range(0, c_num_s): ee = c_idx[idx_[ei]] train_idx[ee] = 1 train_target.targets = predict new_src = copy.deepcopy(train_target) new_tar = copy.deepcopy(train_target2) # pdb.set_trace() if args.dset == 'm2u': new_src.train_data = np.delete(new_src.train_data, np.where(train_idx == 0)[0], axis=0) new_src.train_labels = np.delete(new_src.train_labels, np.where(train_idx == 0)[0], axis=0) new_tar.train_data = np.delete(new_tar.train_data, np.where(train_idx == 1)[0], axis=0) new_tar.train_labels = np.delete(new_tar.train_labels, np.where(train_idx == 1)[0], axis=0) else: new_src.data = np.delete(new_src.data, np.where(train_idx == 0)[0], axis=0) new_src.targets = np.delete(new_src.targets, np.where(train_idx == 0)[0], axis=0) new_tar.data = np.delete(new_tar.data, np.where(train_idx == 1)[0], axis=0) new_tar.targets = np.delete(new_tar.targets, np.where(train_idx == 1)[0], axis=0) # pdb.set_trace() return new_src, new_tar
def train_target(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() args.modelpath = args.output_dir + '/source_F_val.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_B_val.pt' netB.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_C_val.pt' netC.load_state_dict(torch.load(args.modelpath)) netC.eval() for k, v in netC.named_parameters(): v.requires_grad = False param_group = [] for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4, nesterov=True) for epoch in tqdm(range(args.max_epoch), leave=False): iter_test = iter(dset_loaders["target"]) netF.eval() netF.eval() mem_label = obtain_label(dset_loaders['target_te'], netF, netB, netC, args) mem_label = torch.from_numpy(mem_label).cuda() netF.train() netB.train() for _, (inputs_test, _, tar_idx) in tqdm(enumerate(iter_test), leave=False): if inputs_test.size(0) == 1: continue inputs_test = inputs_test.cuda() pred = mem_label[tar_idx] features_test = netB(netF(inputs_test)) outputs_test = netC(features_test) classifier_loss = CrossEntropyLabelSmooth( num_classes=args.class_num, epsilon=0)(outputs_test, pred) softmax_out = nn.Softmax(dim=1)(outputs_test) im_loss = torch.mean(Entropy(softmax_out)) msoftmax = softmax_out.mean(dim=0) im_loss -= torch.sum(-msoftmax * torch.log(msoftmax + 1e-5)) total_loss = im_loss + args.cls_par * classifier_loss optimizer.zero_grad() total_loss.backward() optimizer.step() netF.eval() netB.eval() acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format( args.dset, epoch + 1, args.max_epoch, acc * 100) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') # torch.save(netF.state_dict(), osp.join(args.output_dir, "target_F.pt")) # torch.save(netB.state_dict(), osp.join(args.output_dir, "target_B.pt")) # torch.save(netC.state_dict(), osp.join(args.output_dir, "target_C.pt")) return netF, netB, netC
def train_source(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() param_group = [] learning_rate = args.lr for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netC.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4, nesterov=True) acc_init = 0 for epoch in tqdm(range(args.max_epoch), leave=False): # scheduler.step() netF.train() netB.train() netC.train() iter_source = iter(dset_loaders["source_tr"]) for _, (inputs_source, labels_source) in tqdm(enumerate(iter_source), leave=False): if inputs_source.size(0) == 1: continue inputs_source, labels_source = inputs_source.cuda( ), labels_source.cuda() outputs_source = netC(netB(netF(inputs_source))) classifier_loss = CrossEntropyLabelSmooth( num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) optimizer.zero_grad() classifier_loss.backward() optimizer.step() netF.eval() netB.eval() netC.eval() acc_s_tr, _ = cal_acc(dset_loaders['source_tr'], netF, netB, netC) acc_s_te, _ = cal_acc(dset_loaders['source_te'], netF, netB, netC) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%/ {:.2f}%'.format( args.dset, epoch + 1, args.max_epoch, acc_s_tr * 100, acc_s_te * 100) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') if acc_s_te >= acc_init: acc_init = acc_s_te best_netF = netF.state_dict() best_netB = netB.state_dict() best_netC = netC.state_dict() torch.save(best_netF, osp.join(args.output_dir, "source_F_val.pt")) torch.save(best_netB, osp.join(args.output_dir, "source_B_val.pt")) torch.save(best_netC, osp.join(args.output_dir, "source_C_val.pt")) return netF, netB, netC
def pretrain_on_source(src_data_loader, src_data_loader_eval, output_dir): ## set base network if params.mode == 'u2m': netF = network.LeNetBase().cuda() elif params.mode == 'm2u': netF = network.LeNetBase().cuda() elif params.mode == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=params.classifier, feature_dim=netF.in_features, bottleneck_dim=params.bottleneck).cuda() netC = network.feat_classifier(type=params.layer, class_num=params.class_num, bottleneck_dim=params.bottleneck).cuda() param_group = [] learning_rate = params.lr for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netC.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4, nesterov=True) acc_init = 0 out_file = open(os.path.join(output_dir, 'log_pretrain.txt'), 'w') for epoch in range(params.epochs): # scheduler.step() netF.train() netB.train() netC.train() iter_source = iter(src_data_loader) for _, (inputs_source, labels_source) in enumerate(iter_source): if inputs_source.size(0) == 1: continue inputs_source, labels_source = inputs_source.cuda( ), labels_source.cuda() outputs_source = netC(netB(netF(inputs_source))) classifier_loss = network.CrossEntropyLabelSmooth( num_classes=params.class_num, epsilon=params.smooth)(outputs_source, labels_source) optimizer.zero_grad() classifier_loss.backward() optimizer.step() netF.eval() netB.eval() netC.eval() acc_s_tr, _ = network.cal_acc(src_data_loader, netF, netB, netC) acc_s_te, _ = network.cal_acc(src_data_loader_eval, netF, netB, netC) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%/ {:.2f}%'.format( params.mode, epoch + 1, params.epochs, acc_s_tr * 100, acc_s_te * 100) out_file.write(log_str + '\n') out_file.flush() print(log_str + '\n') if acc_s_te >= acc_init: acc_init = acc_s_te best_netF = netF.state_dict() best_netB = netB.state_dict() best_netC = netC.state_dict() torch.save(best_netF, os.path.join(output_dir, "source_F_val.pt")) torch.save(best_netB, os.path.join(output_dir, "source_B_val.pt")) torch.save(best_netC, os.path.join(output_dir, "source_C_val.pt")) return netF, netB, netC
def train_target(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() args.modelpath = args.output_dir + '/source_F_val.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_B_val.pt' netB.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_C_val.pt' netC.load_state_dict(torch.load(args.modelpath)) # 只设置netC为测试模式,也就是只设置判别器为测试模型,这样在训练整个模型的时候,netC的参数就不会变化,这样,就保证了F=g.h中的h不变了 netC.eval() for k, v in netC.named_parameters(): v.requires_grad = False param_group = [] for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4, nesterov=True) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) for epoch in tqdm(range(args.max_epoch), leave=False): # 设置前面的为训练模式 netF.train() netB.train() iter_test = iter(dset_loaders["target"]) # 在这里还保存了源域的g模型,并且设置为测试模式 # 注意,这里拷贝的是上次epoch的模型,而不是源域模型不变 prev_F = copy.deepcopy(netF) prev_B = copy.deepcopy(netB) prev_F.eval() prev_B.eval() # 获取质心,对应论文里面的第一个公式 center = obtain_center(dset_loaders['target'], prev_F, prev_B, netC, args) for _, (inputs_test, _) in tqdm(enumerate(iter_test), leave=False): if inputs_test.size(0) == 1: continue inputs_test = inputs_test.cuda() with torch.no_grad(): # 注意,是每进行一个数据batch的iteration就预测label一次, # 另外,无论iteration多少次,他们预测label使用的模型都是上次epoch使用的模型 # 下面这两句对应论文里面的第二个公式 # todo li 论文里面还有第三个和第四个公式,怎么没看到在哪啊。 features_test = prev_B(prev_F(inputs_test)) pred = obtain_label(features_test, center) # 这里是正常的数据经过网络 features_test = netB(netF(inputs_test)) outputs_test = netC(features_test) # 计算损失 classifier_loss = CrossEntropyLabelSmooth( num_classes=args.class_num, epsilon=0)(outputs_test, pred) # 这里计算IM loss # 这里计算的是softmax的输出,对dim=1进行softmax softmax_out = nn.Softmax(dim=1)(outputs_test) # 这个entropy计算的是-sum(softmax_out*log(softmax_out),dim=1),得到每个batch的概率之和 # 然后再进行一个mean,相当于是计算出来了概率之和相对于每个batch的平均值 # 这个im_loss对应论文里面的Lent im_loss = torch.mean(Entropy(softmax_out)) # msoftmax计算出来了每个类别的概率,batch的平均值,这个对应论文里面的p^k msoftmax = softmax_out.mean(dim=0) # 这里的这个-=配合sum里面的负号,就是+=. 这里是求K个的平均值,对应论文里面的Ldiv im_loss -= torch.sum(-msoftmax * torch.log(msoftmax + 1e-5)) # args.par在这里用到了,是权衡IM loss和classifier_loss的超参数 total_loss = im_loss + args.par * classifier_loss optimizer.zero_grad() total_loss.backward() optimizer.step() netF.eval() netB.eval() acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format( args.dset, epoch + 1, args.max_epoch, acc * 100) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') # torch.save(netF.state_dict(), osp.join(args.output_dir, "target_F.pt")) # torch.save(netB.state_dict(), osp.join(args.output_dir, "target_B.pt")) # torch.save(netC.state_dict(), osp.join(args.output_dir, "target_C.pt")) return netF, netB, netC
def train_source(args): """ 这里应该是训练源域的模型 base使用的是LeNet居然是那么古老的模型,1994年诞生,很简单的一个模型 """ # 加载DataLoader用来加载数据 dset_loaders = digit_load(args) # set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() # 这个args.bottleneck决定了bottleneck那个线性层的输出维度 # 这个args.classifier决定了bottleneck线性层之后是否需要进行batchNorm和Dropout netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() # 这个args.layer参数决定了是否需要使用torch.nn.utils.weight_norm netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() param_group = [] # 居然又是和faster r-cnn代码一样手动更新参数??? learning_rate = args.lr for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netC.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] # 使用SGD算法进行优化,同时还使用了momentum optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4, nesterov=True) acc_init = 0 for epoch in tqdm(range(args.max_epoch), leave=False): # scheduler.step() # 全部设置为训练模式 netF.train() netB.train() netC.train() # 加载源域的训练数据 iter_source = iter(dset_loaders["source_tr"]) # 数据训练一遍 for _, (inputs_source, labels_source) in tqdm(enumerate(iter_source), leave=False): if inputs_source.size(0) == 1: continue inputs_source, labels_source = inputs_source.cuda( ), labels_source.cuda() # 简单粗暴 outputs_source = netC(netB(netF(inputs_source))) classifier_loss = CrossEntropyLabelSmooth(num_classes=args.class_num, epsilon=args.smooth) \ (outputs_source, labels_source) optimizer.zero_grad() classifier_loss.backward() optimizer.step() # 数据每次训练了一遍就调整到测试模式 netF.eval() netB.eval() netC.eval() # 计算精度,在训练集的精度和在测试集的精度 acc_s_tr, _ = cal_acc(dset_loaders['source_tr'], netF, netB, netC) acc_s_te, _ = cal_acc(dset_loaders['source_te'], netF, netB, netC) # 将精度结果写入log文件 log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%/ {:.2f}%'.format( args.dset, epoch + 1, args.max_epoch, acc_s_tr * 100, acc_s_te * 100) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') if acc_s_te >= acc_init: # 如果这个epoch的结果比之前的所有的都好,就记录精度和模型的参数 acc_init = acc_s_te best_netF = netF.state_dict() best_netB = netB.state_dict() best_netC = netC.state_dict() # 将最终的模型参数存储下来 torch.save(best_netF, osp.join(args.output_dir, "source_F_val.pt")) torch.save(best_netB, osp.join(args.output_dir, "source_B_val.pt")) torch.save(best_netC, osp.join(args.output_dir, "source_C_val.pt")) return netF, netB, netC
def train_target_rot(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netR = network.feat_classifier(type='linear', class_num=4, bottleneck_dim=2 * args.bottleneck).cuda() args.modelpath = args.output_dir + '/source_F.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_B.pt' netB.load_state_dict(torch.load(args.modelpath)) netF.eval() for k, v in netF.named_parameters(): v.requires_grad = False netB.eval() for k, v in netB.named_parameters(): v.requires_grad = False param_group = [] for k, v in netR.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] netR.train() optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) max_iter = args.max_epoch * len(dset_loaders["target"]) interval_iter = max_iter // 10 iter_num = 0 rot_acc = 0 while iter_num < max_iter: optimizer.zero_grad() try: inputs_test, _, tar_idx = iter_test.next() except: iter_test = iter(dset_loaders["target"]) inputs_test, _, tar_idx = iter_test.next() if inputs_test.size(0) == 1: continue inputs_test = inputs_test.cuda() iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) r_labels_target = np.random.randint(0, 4, len(inputs_test)) r_inputs_target = rotation.rotate_batch_with_labels( inputs_test, r_labels_target) r_labels_target = torch.from_numpy(r_labels_target).cuda() r_inputs_target = r_inputs_target.cuda() f_outputs = netB(netF(inputs_test)) f_r_outputs = netB(netF(r_inputs_target)) r_outputs_target = netR(torch.cat((f_outputs, f_r_outputs), 1)) rotation_loss = nn.CrossEntropyLoss()(r_outputs_target, r_labels_target) rotation_loss.backward() optimizer.step() if iter_num % interval_iter == 0 or iter_num == max_iter: netR.eval() acc_rot = cal_acc_rot(dset_loaders['target'], netF, netB, netR) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format( args.dset, iter_num, max_iter, acc_rot) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') netR.train() if rot_acc < acc_rot: rot_acc = acc_rot best_netR = netR.state_dict() log_str = 'Best Accuracy = {:.2f}%'.format(rot_acc) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') return best_netR, rot_acc