def cal_acc(loader, netF, netB, netC, flag=False): start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) _, predict = torch.max(all_output, 1) accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0]) mean_ent = torch.mean(loss.Entropy(nn.Softmax(dim=1)(all_output))).cpu().data.item() if flag: matrix = confusion_matrix(all_label, torch.squeeze(predict).float()) matrix = matrix[np.unique(all_label).astype(int),:] acc = matrix.diagonal()/matrix.sum(axis=1) * 100 aacc = acc.mean() aa = [str(np.round(i, 2)) for i in acc] acc = ' '.join(aa) return aacc, acc else: return accuracy*100, mean_ent
def cal_acc(loader, netF, netB, netC, flag=False): start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) _, predict = torch.max(all_output, 1) accuracy = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) mean_ent = torch.mean(loss.Entropy( nn.Softmax(dim=1)(all_output))).cpu().data.item() if flag: matrix = confusion_matrix(all_label, torch.squeeze(predict).float()) pdb.set_trace() acc = matrix.diagonal() / matrix.sum(axis=1) return np.mean(acc), acc else: return accuracy, mean_ent
def cal_acc(loader, netF, netB, netC): start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) all_output = nn.Softmax(dim=1)(all_output) _, predict = torch.max(all_output, 1) accuracy = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) mean_ent = torch.mean(loss.Entropy(all_output)).cpu().data.item() return accuracy * 100, mean_ent
def cal_acc(loader, net, flag=False): start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() _, outputs = net(inputs) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) _, predict = torch.max(all_output, 1) all_output = nn.Softmax(dim=1)(all_output) ent = torch.sum(-all_output * torch.log(all_output + args.epsilon), dim=1) / np.log(all_output.size(1)) accuracy = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) mean_ent = torch.mean(loss.Entropy( nn.Softmax(dim=1)(all_output))).cpu().data.item() return accuracy, mean_ent
def get_ent(oh_final_f): oh_final_f = torch.from_numpy(oh_final_f) all_output = nn.Softmax(dim=1)(oh_final_f) out_ent = loss.Entropy(all_output) mean_ent = torch.mean(out_ent) out_ent_arr = out_ent.cpu().numpy() return mean_ent, out_ent_arr
def train(args, config, model, ad_net, random_layer, train_loader, train_loader1, optimizer, optimizer_ad, epoch): model.train() len_source = len(train_loader) len_target = len(train_loader1) if len_source > len_target: num_iter = len_source else: num_iter = len_target total_loss = 0 for batch_idx in range(num_iter): if batch_idx % len_source == 0: iter_source = iter(train_loader) if batch_idx % len_target == 0: iter_target = iter(train_loader1) data_source, label_source = iter_source.next() data_source, label_source = data_source.cuda(), label_source.cuda() data_target, label_target = iter_target.next() data_target = data_target.cuda() optimizer.zero_grad() optimizer_ad.zero_grad() feature_source, output_source = model(data_source) feature_target, output_target = model(data_target) feature = torch.cat((feature_source, feature_target), 0) output = torch.cat((output_source, output_target), 0) labels_target_fake = torch.max(nn.Softmax(dim=1)(output_target), 1)[1] labels = torch.cat((label_source, labels_target_fake)) loss = nn.CrossEntropyLoss()(output.narrow(0, 0, data_source.size(0)), label_source) softmax_output = nn.Softmax(dim=1)(output) if epoch > 0: entropy = loss_func.Entropy(softmax_output) loss += loss_func.CDAN([feature, softmax_output], ad_net, entropy, network.calc_coeff(num_iter * (epoch - 0) + batch_idx), random_layer) mdd_loss = args.mdd_weight * loss_func.mdd_digit( feature, labels, args.left_weight, args.right_weight, args.weight) loss = loss + mdd_loss total_loss += loss.data loss.backward() optimizer.step() if epoch > 0: optimizer_ad.step() if (batch_idx + epoch * num_iter) % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * args.batch_size, num_iter * args.batch_size, 100. * batch_idx / num_iter, loss.item())) log_str = "total_loss:{}\n".format(total_loss) config["out_file"].write(log_str) config["out_file"].flush() print(log_str)
def cal_acc(loader, netF=None, netB=None, netC=None, per_class_flag=False, visda_flag=False): """Calculate model accuracy on validation set or testing set :param loader: dataloader :param netF: feature extractor network :param netB: bottleneck network :param netC: classifier network :param per_class_flag: if True: calculatge per-class average accuracy :param visda_flag: if True: return acc of each class, else: no need to return acc of each class :return: overall acc, per-class average acc, str: acc of each class, mean entropy """ start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) all_output = nn.Softmax(dim=1)(all_output) _, predict = torch.max(all_output, 1) accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0]) accuracy *= 100 # overall accuracy mean_ent = torch.mean(loss.Entropy(all_output)).cpu().data.item() # average entropy of classification results if per_class_flag: matrix = confusion_matrix(all_label, torch.squeeze(predict).float()) per_cls_acc_vec = matrix.diagonal() / matrix.sum(axis=1) * 100 per_cls_avg_acc = per_cls_acc_vec.mean() # Per-class avg acc per_cls_acc_list = [str(np.round(i, 2)) for i in per_cls_acc_vec] acc_each_cls = ' '.join(per_cls_acc_list) # str: acc of each class if visda_flag: # For VisDA, return acc of each cls to be printed # overall acc, acc of each cls: str, per-class avg acc return accuracy, acc_each_cls, per_cls_avg_acc elif per_class_flag: # For Office-Home and DomainNet, no need to return acc of each class # overall acc, per-class avg acc, average entropy return accuracy, per_cls_avg_acc, mean_ent else: # overall acc, mean-ent return accuracy, mean_ent
def train_target(args): dset_loaders = data_load(args) param_group = [] model_resnet = network.Res50().cuda() for k, v in model_resnet.named_parameters(): if k.__contains__('fc'): v.requires_grad = False else: param_group += [{'params': v, 'lr': args.lr}] optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4, nesterov=True) for epoch in tqdm(range(args.max_epoch), leave=False): model_resnet.eval() mem_label = obtain_label(dset_loaders['test'], model_resnet, args) mem_label = torch.from_numpy(mem_label).cuda() model_resnet.train() iter_test = iter(dset_loaders['target']) for _, (inputs_test, _, tar_idx) in tqdm(enumerate(iter_test), leave=False): if inputs_test.size(0) == 1: continue inputs_test = inputs_test.cuda() pred = mem_label[tar_idx] features_test, outputs_test = model_resnet(inputs_test) classifier_loss = loss.CrossEntropyLabelSmooth(num_classes=args.class_num, epsilon=0)(outputs_test, pred) classifier_loss *= args.cls_par if args.ent: softmax_out = nn.Softmax(dim=1)(outputs_test) entropy_loss = torch.mean(loss.Entropy(softmax_out)) if args.gent: msoftmax = softmax_out.mean(dim=0) gentropy_loss = torch.sum(-msoftmax * torch.log(msoftmax + args.epsilon)) entropy_loss -= gentropy_loss classifier_loss += entropy_loss * args.ent_par optimizer.zero_grad() classifier_loss.backward() optimizer.step() model_resnet.eval() acc, ment = cal_acc(dset_loaders['test'], model_resnet) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(args.dset, epoch+1, args.max_epoch, acc*100) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str+'\n') # torch.save(model_resnet.state_dict(), osp.join(args.output_dir, 'target.pt')) return model_resnet
def cal_acc(loader, netF, netB, netC, flag=False, threshold=0.1): start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) _, predict = torch.max(all_output, 1) accuracy = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) mean_ent = torch.mean(loss.Entropy( nn.Softmax(dim=1)(all_output))).cpu().data.item() if flag: all_output = nn.Softmax(dim=1)(all_output) ent = torch.sum(-all_output * torch.log(all_output + args.epsilon), dim=1) / np.log(args.class_num) # predict[ent>threshold] = args.class_num # from sklearn.mixture import GaussianMixture as GMM # gmm = GMM(n_components=2, random_state=0).fit(ent.reshape(-1,1)) # labels = gmm.predict(ent.reshape(-1,1)) from sklearn.cluster import KMeans kmeans = KMeans(2, random_state=0).fit(ent.reshape(-1, 1)) labels = kmeans.predict(ent.reshape(-1, 1)) idx = np.where(labels == 1)[0] iidx = 0 if ent[idx].mean() > ent.mean(): iidx = 1 predict[np.where(labels == iidx)[0]] = args.class_num matrix = confusion_matrix(all_label, torch.squeeze(predict).float()) matrix = matrix[np.unique(all_label).astype(int), :] acc = matrix.diagonal() / matrix.sum(axis=1) * 100 unknown_acc = acc[-1:].item() # accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0]) * 100 # print(np.mean(acc[:-1]), np.mean(acc), unknown_acc) return np.mean(acc[:-1]), np.mean(acc), unknown_acc else: return accuracy, mean_ent
def train(dataloader_src, dataloader_tgt, discriminator, classifier, train_epochs, writer): discriminator.train() classifier.train() loss_clf = nn.CrossEntropyLoss() # 复习一下:momentum就是上次更新的方向和这次的梯度反向一样,那么这次就加快速度; # weight_decay就是 L2 regularization optimizer = optim.SGD(itertools.chain(classifier.parameters(), discriminator.parameters()), lr=1e-3, momentum=0.9, weight_decay=0.0009) loss_clf_ = transfer_loss = 0 for epoch in range(train_epochs): for (imgs_src, labels_src), (imgs_tgt, labels_tgt) in zip(dataloader_src, dataloader_tgt): imgs_src = Variable(imgs_src.type(FloatTensor)).reshape( imgs_src.shape[0], -1) labels_src = Variable(labels_src.type(LongTensor)) imgs_tgt = Variable(imgs_tgt.type(FloatTensor)).reshape( imgs_tgt.shape[0], -1) labels_tgt = Variable(labels_tgt.type(FloatTensor)) # train source domain fea_src, pred_src = classifier(imgs_src) fea_tgt, pred_tgt = classifier(imgs_tgt) fea = torch.cat((fea_src, fea_tgt), 0) pred = torch.cat((pred_src, pred_tgt), 0) # 计算概率 softmax_out = nn.Softmax(dim=1)(pred) # 计算熵和discriminator loss entropy = loss.Entropy(softmax_out) transfer_loss = loss.CDAN([fea, softmax_out], discriminator, entropy, networks.calc_coeff(epoch)) # classifier loss loss_clf_ = loss_clf(pred_src, labels_src) with OptimizerManager([optimizer]): total_loss = transfer_loss + loss_clf_ total_loss.backward() if epoch % 5 == 0: acc_src, acc_tgt = evaluate(classifier, dataloader_src, dataloader_tgt) writer.add_scalar('Train/loss_c_src', loss_clf_, epoch) writer.add_scalar('Train/transfer_loss', transfer_loss, epoch) writer.add_scalar('Evaluate/Acc_src', acc_src, epoch) writer.add_scalar('Evaluate/Acc_tgt', acc_tgt, epoch)
def train(args, model, ad_net, random_layer, train_loader, train_loader1, optimizer, optimizer_ad, epoch, start_epoch, method): model.train() len_source = len(train_loader) len_target = len(train_loader1) if len_source > len_target: num_iter = len_source else: num_iter = len_target for batch_idx in range(num_iter): if batch_idx % len_source == 0: iter_source = iter(train_loader) if batch_idx % len_target == 0: iter_target = iter(train_loader1) data_source, label_source = iter_source.next() data_source, label_source = data_source.cuda(), label_source.cuda() data_target, label_target = iter_target.next() data_target = data_target.cuda() optimizer.zero_grad() optimizer_ad.zero_grad() feature_source, output_source = model(data_source) feature_target, output_target = model(data_target) feature = torch.cat((feature_source, feature_target), 0) output = torch.cat((output_source, output_target), 0) labels_target_fake = torch.max(nn.Softmax(dim=1)(output_target), 1)[1] labels = torch.cat((label_source, labels_target_fake)) loss = nn.CrossEntropyLoss()(output.narrow(0, 0, data_source.size(0)), label_source) softmax_output = nn.Softmax(dim=1)(output) if epoch > start_epoch: entropy = loss_func.Entropy(softmax_output) loss += loss_func.CDAN( [feature, softmax_output], ad_net, entropy, network.calc_coeff(num_iter * (epoch - start_epoch) + batch_idx), random_layer) loss = loss + args.mdd_weight * loss_func.mdd_digit( feature, labels ) + args.entropic_weight * loss_func.EntropicConfusion(feature) loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (batch_idx + epoch * num_iter) % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.4f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * args.batch_size, num_iter * args.batch_size, 100. * batch_idx / num_iter, loss.item()))
def train(args, model, ad_net, random_layer, train_loader, train_loader1, optimizer, optimizer_ad, epoch, start_epoch, method): model.train() len_source = len(train_loader) len_target = len(train_loader1) if len_source > len_target: num_iter = len_source else: num_iter = len_target for batch_idx in range(num_iter): if batch_idx % len_source == 0: iter_source = iter(train_loader) if batch_idx % len_target == 0: iter_target = iter(train_loader1) data_source, label_source = iter_source.next() data_source, label_source = data_source.cuda(), label_source.cuda() data_target, label_target = iter_target.next() data_target = data_target.cuda() print('data_source:', data_source.shape, data_target.shape) optimizer.zero_grad() optimizer_ad.zero_grad() feature, output = model(torch.cat((data_source, data_target), 0)) loss = nn.CrossEntropyLoss()(output.narrow(0, 0, data_source.size(0)), label_source) softmax_output = nn.Softmax(dim=1)(output) if epoch > start_epoch: if method == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) loss += loss_func.CDAN( [feature, softmax_output], ad_net, entropy, network.calc_coeff(num_iter * (epoch - start_epoch) + batch_idx), random_layer) elif method == 'CDAN': loss += loss_func.CDAN([feature, softmax_output], ad_net, None, None, random_layer) elif method == 'DANN': loss += loss_func.DANN(feature, ad_net) else: raise ValueError('Method cannot be recognized.') loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (batch_idx + epoch * num_iter) % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * args.batch_size, num_iter * args.batch_size, 100. * batch_idx / num_iter, loss.item()))
def cal_acc(loader, netF, netB, netC, flag=False): start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) all_output = nn.Softmax(dim=1)(all_output) _, predict = torch.max(all_output, 1) accuracy = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) mean_ent = torch.mean(loss.Entropy(all_output)).cpu().data.item() if flag: all_output = nn.Softmax(dim=1)(all_output) ent = torch.sum(-all_output * torch.log(all_output + args.epsilon), dim=1) / np.log(args.class_num) ent = ent.float().cpu() initc = np.array([[0], [1]]) kmeans = KMeans(n_clusters=2, random_state=0, init=initc, n_init=1).fit(ent.reshape(-1, 1)) threshold = (kmeans.cluster_centers_).mean() predict[ent > threshold] = args.class_num matrix = confusion_matrix(all_label, torch.squeeze(predict).float()) matrix = matrix[np.unique(all_label).astype(int), :] acc = matrix.diagonal() / matrix.sum(axis=1) unknown_acc = acc[-1:].item() return np.mean(acc), np.mean(acc[:-1]) else: return accuracy, mean_ent
def cal_acc(loader, netF, netB, netC): k = 0 start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() input_images = [] inputs = data[0] inputs_clone = inputs.clone() for j in range(inputs_clone.size(0)): x = transforms.Normalize((-1, ), (2, ))(inputs_clone[j]) input_images.append(transforms.ToPILImage()(x)) labels = data[1] outputs = netC(netB(netF(inputs))) # _, predict = torch.max(outputs.float().cpu(), 1) for j in range(inputs.size(0)): folder = args.output_dir + '/inspect/label-{}'.format( labels[j]) if not osp.exists(folder): os.makedirs(folder) subfolder = folder + '/pred-{}'.format(predict[j]) if not osp.exists(subfolder): os.makedirs(subfolder) input_images[j].save(subfolder + '/{}.jpg'.format(k)) k += 1 # if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) _, predict = torch.max(all_output, 1) accuracy = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) mean_ent = torch.mean(loss.Entropy( nn.Softmax(dim=1)(all_output))).cpu().data.item() return accuracy * 100, mean_ent
def cal_acc_multi(loader, netF_list, netB_list, netC_list, netG_list, args): start_test = True with torch.no_grad(): iter_test = iter(loader) for _ in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs_all = torch.zeros(len(args.src), inputs.shape[0], args.class_num) weights_all = torch.ones(inputs.shape[0], len(args.src)) outputs_all_w = torch.zeros(inputs.shape[0], args.class_num) for i in range(len(args.src)): features = netB_list[i](netF_list[i](inputs)) outputs = netC_list[i](features) weights = netG_list[i](features) outputs_all[i] = outputs weights_all[:, i] = weights.squeeze() z = torch.sum(weights_all, dim=1) z = z + 1e-16 weights_all = torch.transpose(torch.transpose(weights_all,0,1)/z,0,1) print(weights_all.mean(dim=0)) outputs_all = torch.transpose(outputs_all, 0, 1) for i in range(inputs.shape[0]): outputs_all_w[i] = torch.matmul(torch.transpose(outputs_all[i],0,1), weights_all[i]) if start_test: all_output = outputs_all_w.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs_all_w.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) _, predict = torch.max(all_output, 1) accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0]) mean_ent = torch.mean(loss.Entropy(nn.Softmax(dim=1)(all_output))).cpu().data.item() return accuracy*100, mean_ent
def cal_acc(loader, netF, netH, netB, netC, args, flag=False): start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[-1] inputs = inputs.cuda() labels = labels.cuda() if args.layer in ['add_margin', 'arc_margin', 'sphere']: labels_forward = labels else: labels_forward = None outputs = netC(netB(netF(inputs)), labels_forward) if start_test: all_output = outputs.float().cpu() all_label = labels.float().cpu() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float().cpu()), 0) all_output = nn.Softmax(dim=1)(all_output) _, predict = torch.max(all_output, 1) accuracy = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) mean_ent = torch.mean(loss.Entropy(all_output)).cpu().data.item() if flag: matrix = confusion_matrix(all_label, torch.squeeze(predict).float()) acc = matrix.diagonal() / matrix.sum(axis=1) * 100 aacc = acc.mean() aa = [str(np.round(i, 2)) for i in acc] acc = ' '.join(aa) return aacc, acc else: return accuracy * 100, mean_ent
def maxent_step(inputs, netF, netH, netB, netC, optim, epsilon=1e-8): netF.train() netH.train() netB.train() optim.zero_grad() c3 = netC(netB(netF(inputs)), None) softmax_out = nn.Softmax(dim=1)(c3) msoftmax = softmax_out.mean(dim=0) gentropy = -torch.sum(-msoftmax * torch.log(msoftmax + epsilon)) with torch.no_grad(): entropy = torch.mean(loss.Entropy(softmax_out)) gentropy.backward() optim.step() return entropy.item(), gentropy.item()
def cal_acc(loader, netF, netB, netC, visda_flag=False): start_test = True with torch.no_grad(): iter_test = iter(loader) for i in range(len(loader)): data = iter_test.next() inputs = data[0] labels = data[1] inputs = inputs.cuda() outputs = netC(netB(netF(inputs))) if start_test: all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) _, predict = torch.max(all_output, 1) accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0]) accuracy *= 100 mean_ent = torch.mean(loss.Entropy(nn.Softmax(dim=1)(all_output))).cpu().data.item() matrix = confusion_matrix(all_label, torch.squeeze(predict).float()) per_cls_acc_vec = matrix.diagonal() / matrix.sum(axis=1) * 100 per_cls_avg_acc = per_cls_acc_vec.mean() # Per-class avg acc per_cls_acc_list = [str(np.round(i, 2)) for i in per_cls_acc_vec] acc_each_cls = ' '.join(per_cls_acc_list) if visda_flag: # For VisDA, return acc of each cls to be printed # overall acc, acc of each cls: str, per-class avg acc return accuracy, acc_each_cls, per_cls_avg_acc else: # For other datasets, need not return acc of each cls # overall acc, acc of each cls: str, mean-ent return accuracy, per_cls_avg_acc, mean_ent
def train(args): ent_loss_record = [] gent_loss_record = [] sent_loss_record = [] total_loss_record = [] dset_loaders = digit_load(args) ## set base network if args.dset == 'u': netF = network.LeNetBase() #.cuda() elif args.dset == 'm': netF = network.LeNetBase() #.cuda() elif args.dset == 's': netF = network.DTNBase() #.cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck) #.cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck) #.cuda() param_group = [] learning_rate = args.lr for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] for k, v in netC.named_parameters(): param_group += [{'params': v, 'lr': learning_rate}] optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) acc_init = 0 max_iter = args.max_epoch * len(dset_loaders["train"]) interval_iter = max_iter // 10 iter_num = 0 netF.train() netB.train() netC.train() while iter_num < max_iter: try: inputs_source, strong_inputs, target = iter_source.next() except: iter_source = iter(dset_loaders["train"]) inputs_source, strong_inputs, target = iter_source.next() if inputs_source.size(0) == 1: continue iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) inputs_source = inputs_source #.cuda() outputs_source = netC(netB(netF(inputs_source))) total_loss = torch.tensor(0.0) #.cuda() softmax_out = nn.Softmax(dim=1)(outputs_source) if args.ent: ent_loss = torch.mean(loss.Entropy(softmax_out)) total_loss += ent_loss ent_loss_record.append(ent_loss.detach().cpu()) if args.gent: msoftmax = softmax_out.mean(dim=0) gent_loss = -torch.sum(-msoftmax * torch.log(msoftmax + 1e-5)) gent_loss_record.append(gent_loss.detach().cpu()) total_loss += gent_loss if args.sent: sent_loss = compute_aug_loss(strong_inputs, target, netC, netB, netF) total_loss += sent_loss sent_loss_record.append(sent_loss.detach().cpu()) optimizer.zero_grad() total_loss.backward() optimizer.step() total_loss_record.append(total_loss.detach().cpu()) if iter_num % interval_iter == 0 or iter_num == max_iter: print(iter_num, interval_iter, max_iter) # netF.eval() # netB.eval() # netC.eval() # acc_s_tr, _ = cal_acc(dset_loaders['train'], netF, netB, netC) # acc_s_te, _ = cal_acc(dset_loaders['test'], netF, netB, netC) # log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%/ {:.2f}%'.format(args.dset, iter_num, max_iter, acc_s_tr, acc_s_te) # args.out_file.write(log_str + '\n') # args.out_file.flush() # print(log_str+'\n') # if acc_s_te >= acc_init: # acc_init = acc_s_te # best_netF = netF.state_dict() # best_netB = netB.state_dict() # best_netC = netC.state_dict() # netF.train() # netB.train() # netC.train() best_netF = netF.state_dict() best_netB = netB.state_dict() best_netC = netC.state_dict() torch.save(best_netF, osp.join(args.output_dir, "F.pt")) torch.save(best_netB, osp.join(args.output_dir, "B.pt")) torch.save(best_netC, osp.join(args.output_dir, "C.pt")) fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4, sharex=True, figsize=(16, 8)) ax1.plot(list(range(len(ent_loss_record))), ent_loss_record, 'r') ax2.plot(list(range(len(gent_loss_record))), gent_loss_record, 'g') ax3.plot(list(range(len(sent_loss_record))), sent_loss_record, 'b') ax4.plot(list(range(len(total_loss_record))), total_loss_record, 'm') plt.tight_layout() plt.savefig(args.output_dir + '/loss.png') return netF, netB, netC
def train(config): # set pre-process prep_config = config["prep"] prep_dict = {} prep_dict["source"] = prep.image_train(**config["prep"]['params']) prep_dict["target"] = prep.image_train(**config["prep"]['params']) if prep_config["test_10crop"]: prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params']) else: prep_dict["test"] = prep.image_test(**config["prep"]['params']) # prepare data dsets = {} dset_loaders = {} data_config = config["data"] train_bs = data_config["source"]["batch_size"] test_bs = data_config["test"]["batch_size"] dsets["source"] = datasets.ImageFolder(data_config['source']['list_path'], transform=prep_dict["source"]) dset_loaders['source'] = getdataloader(dsets['source'], batchsize=train_bs, num_workers=4, drop_last=True, weightsampler=True) dsets["target"] = datasets.ImageFolder(data_config['target']['list_path'], transform=prep_dict["target"]) dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, shuffle=True, num_workers=4, drop_last=True) if prep_config["test_10crop"]: for i in range(10): dsets["test"] = [datasets.ImageFolder(data_config['test']['list_path'], transform=prep_dict["test"][i]) for i in range(10)] dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, shuffle=False, num_workers=4) for dset in dsets['test']] else: dsets["test"] = datasets.ImageFolder(data_config['test']['list_path'], transform=prep_dict["test"]) dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, shuffle=False, num_workers=4) class_num = config["network"]["params"]["class_num"] # set base network net_config = config["network"] base_network = net_config["name"](**net_config["params"]) base_network = base_network.cuda() # set test_ad_net test_ad_net = network.AdversarialNetwork(base_network.output_num(), 1024, test_ad_net=True) test_ad_net = test_ad_net.cuda() # add additional network for some methods if config['method'] == 'DANN': random_layer = None ad_net = network.AdversarialNetwork(base_network.output_num(), 1024) elif config['method'] == 'MADA': random_layer = None ad_net = network.AdversarialNetworkClassGroup(base_network.output_num(), 1024, class_num) elif config['method'] == 'proposed': if config['loss']['random']: random_layer = network.RandomLayer([base_network.output_num(), class_num], config['loss']['random_dim']) ad_net = network.AdversarialNetwork(config['loss']['random_dim'], 1024) ad_net_group = network.AdversarialNetworkGroup(config['loss']['random_dim'], 256, class_num, config['center_threshold']) else: random_layer = None ad_net = network.AdversarialNetwork(base_network.output_num(), 1024) ad_net_group = network.AdversarialNetworkGroup(base_network.output_num(), 1024, class_num, config['center_threshold']) elif config['method'] == 'base': pass else: if config["loss"]["random"]: random_layer = network.RandomLayer([base_network.output_num(), class_num], config["loss"]["random_dim"]) ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024) else: random_layer = None ad_net = network.AdversarialNetwork(base_network.output_num() * class_num, 1024) if config["loss"]["random"] and config['method'] != 'base' and config['method'] != 'DANN' and config['method'] != 'MADA': random_layer.cuda() if config['method'] != 'base': ad_net = ad_net.cuda() if config['method'] == 'proposed': ad_net_group = ad_net_group.cuda() # set parameters if config['method'] == 'proposed': parameter_list = base_network.get_parameters() + test_ad_net.get_parameters() + ad_net.get_parameters() + ad_net_group.get_parameters() elif config['method'] == 'base': parameter_list = base_network.get_parameters() + test_ad_net.get_parameters() elif config['method'] == 'MADA': parameter_list = base_network.get_parameters() + test_ad_net.get_parameters() + ad_net.get_parameters() else: parameter_list = base_network.get_parameters() + test_ad_net.get_parameters() + ad_net.get_parameters() # set optimizer optimizer_config = config["optimizer"] optimizer = optimizer_config["type"](parameter_list, **(optimizer_config["optim_params"])) param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group["lr"]) schedule_param = optimizer_config["lr_param"] lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]] # parallel gpus = config['gpu'].split(',') if len(gpus) > 1: base_network = nn.DataParallel(base_network) test_ad_net = nn.DataParallel(test_ad_net) if config['method'] == 'DANN': ad_net = nn.DataParallel(ad_net) elif config['method'] == 'proposed': if config['loss']['random']: random_layer = nn.DataParallel(random_layer) ad_net = nn.DataParallel(ad_net) #将ad_net_group设置成并行将会引发error,原因可能是由于ad_net_group的输出不是tensor类型,parallel还不能支持。 #ad_net_group = nn.DataParallel(ad_net_group) else: ad_net = nn.DataParallel(ad_net) #ad_net_group = nn.DataParallel(ad_net_group) elif config['method'] == 'base': pass else: # CDAN+E if config["loss"]["random"]: random_layer = nn.DataParallel(random_layer) ad_net = nn.DataParallel(ad_net) # CDAN else: ad_net = nn.DataParallel(ad_net) ## train len_train_source = len(dset_loaders["source"]) len_train_target = len(dset_loaders["target"]) transfer_loss_value = classifier_loss_value = total_loss_value = 0.0 best_acc = 0.0 for i in range(config["num_iterations"]): if i % config["test_interval"] == config["test_interval"] - 1: base_network.train(False) # eval() == train(False) is True temp_acc = image_classification_test(dset_loaders, base_network, test_10crop=prep_config["test_10crop"]) temp_model = nn.Sequential(base_network) if temp_acc > best_acc: best_acc = temp_acc best_model = temp_model log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc) config["out_file"].write(log_str + "\n") config["out_file"].flush() print(log_str) # if i % config["snapshot_interval"] == 0: # torch.save(nn.Sequential(base_network), osp.join(config["output_path"], # "iter_{:05d}_model.pth.tar".format(i))) loss_params = config["loss"] # train one iter base_network.train(True) if config['method'] != 'base': ad_net.train(True) if config['method'] == 'proposed': ad_net_group.train(True) # lr_scheduler optimizer = lr_scheduler(optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train_source == 0: iter_source = iter(dset_loaders["source"]) if i % len_train_target == 0: iter_target = iter(dset_loaders["target"]) inputs_source, labels_source = iter_source.next() inputs_target, labels_target = iter_target.next() inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() features_source, outputs_source = base_network(inputs_source) features_target, outputs_target = base_network(inputs_target) if config['tsne']: # feature visualization by using T-SNE if i == int(0.98*config['num_iterations']): features_source_total = features_source.cpu().detach().numpy() features_target_total = features_target.cpu().detach().numpy() elif i > int(0.98*config['num_iterations']) and i < int(0.98*config['num_iterations'])+10: features_source_total = np.concatenate((features_source_total, features_source.cpu().detach().numpy())) features_target_total = np.concatenate((features_target_total, features_target.cpu().detach().numpy())) elif i == int(0.98*config['num_iterations'])+10: for index in range(config['tsne_num']): features_embeded = TSNE(perplexity=10,n_iter=5000).fit_transform(np.concatenate((features_source_total, features_target_total))) fig = plt.figure() plt.scatter(features_embeded[:len(features_embeded)//2, 0], features_embeded[:len(features_embeded)//2, 1], c='r', s=1) plt.scatter(features_embeded[len(features_embeded)//2:, 0], features_embeded[len(features_embeded)//2:, 1], c='b', s=1) plt.savefig(osp.join(config["output_path"], config['method']+'-'+str(index)+'.png')) plt.close() else: pass assert features_source.size(0) == features_target.size(0), 'The batchsize must be same' assert outputs_source.size(0) == outputs_target.size(0), 'The batchsize must be same' # source first, target second features = torch.cat((features_source, features_target), dim=0) outputs = torch.cat((outputs_source, outputs_target), dim=0) # output the A_distance if i % config["test_interval"] == config["test_interval"] - 1: A_distance = cal_A_distance(test_ad_net, features) config['A_distance_file'].write(str(A_distance)+'\n') config['A_distance_file'].flush() softmax_out = nn.Softmax(dim=1)(outputs) if config['method'] == 'CDAN+E': entropy = loss.Entropy(softmax_out) transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy, network.calc_coeff(i), random_layer) elif config['method'] == 'CDAN': transfer_loss = loss.CDAN([features, softmax_out], ad_net, None, None, random_layer) elif config['method'] == 'DANN': transfer_loss = loss.DANN(features, ad_net) elif config['method'] == 'MADA': transfer_loss = loss.MADA(features, softmax_out, ad_net) elif config['method'] == 'proposed': entropy = loss.Entropy(softmax_out) transfer_loss = loss.proposed([features, outputs], labels_source, ad_net, ad_net_group, entropy, network.calc_coeff(i), i, random_layer, config['loss']['trade_off23']) elif config['method'] == 'base': pass else: raise ValueError('Method cannot be recognized.') test_domain_loss = loss.DANN(features.clone().detach(), test_ad_net) classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) if config['method'] == 'base': total_loss = classifier_loss + test_domain_loss else: total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss + test_domain_loss total_loss.backward() optimizer.step() # torch.save(best_model, osp.join(config["output_path"], "best_model.pth.tar")) return best_acc
def train_target(args): dset_loaders = data_load(args) if args.net[0:3] == 'res': netF = network.ResBase(res_name=args.net).cuda() elif args.net[0:3] == 'vgg': netF = network.VGGBase(vgg_name=args.net).cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() modelpath = args.output_dir_src + '/source_F.pt' netF.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir_src + '/source_B.pt' netB.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir_src + '/source_C.pt' netC.load_state_dict(torch.load(modelpath)) netC.eval() for k, v in netC.named_parameters(): v.requires_grad = False param_group = [] for k, v in netF.named_parameters(): if args.lr_decay1 > 0: param_group += [{'params': v, 'lr': args.lr * args.lr_decay1}] else: v.requires_grad = False for k, v in netB.named_parameters(): if args.lr_decay2 > 0: param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}] else: v.requires_grad = False optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) max_iter = args.max_epoch * len(dset_loaders["target"]) interval_iter = max_iter // args.interval iter_num = 0 iter_sw = int(max_iter / 2.0) while iter_num < max_iter: try: inputs_test, _, tar_idx = iter_test.next() except: iter_test = iter(dset_loaders["target"]) inputs_test, _, tar_idx = iter_test.next() if inputs_test.size(0) == 1: continue if iter_num % interval_iter == 0 and args.cls_par > 0: netF.eval() netB.eval() mem_label_soft, mtx_infor_nh, feas_FC = obtain_label( dset_loaders['test'], netF, netB, netC, args, iter_num, iter_sw) mem_label_soft = torch.from_numpy(mem_label_soft).cuda() feas_all = feas_FC[0] ops_all = feas_FC[1] netF.train() netB.train() inputs_test = inputs_test.cuda() iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) features_F_self = netF(inputs_test) features_F_nh = get_mtx_sam_wgt_nh(feas_all, mtx_infor_nh, tar_idx) features_F_nh = features_F_nh.cuda() features_F_mix = 0.8 * features_F_self + 0.2 * features_F_nh outputs_test_mix = netC(netB(features_F_mix)) ops_test_self = netC(netB(features_F_self)) outputs_test_nh = netC(netB(features_F_nh)) if args.cls_par > 0: log_probs = nn.LogSoftmax(dim=1)(outputs_test_mix) targets = mem_label_soft[tar_idx] loss_soft = (-targets * log_probs).sum(dim=1) classifier_loss = loss_soft.mean() classifier_loss *= args.cls_par if iter_num < interval_iter and args.dset == "VISDA-C": classifier_loss *= 0 else: classifier_loss = torch.tensor(0.0).cuda() if args.ent: softmax_out = nn.Softmax(dim=1)( outputs_test_mix) # outputs_test_mix entropy_loss = torch.mean(loss.Entropy(softmax_out)) if args.gent: msoftmax = softmax_out.mean(dim=0) gentropy_loss = torch.sum(-msoftmax * torch.log(msoftmax + args.epsilon)) entropy_loss -= gentropy_loss im_loss = entropy_loss * args.ent_par classifier_loss += im_loss optimizer.zero_grad() classifier_loss.backward() optimizer.step() if iter_num % interval_iter == 0 or iter_num == max_iter: netF.eval() netB.eval() if args.dset == 'VISDA-C': acc_s_te, acc_list = cal_acc(dset_loaders['test'], netF, netB, netC, True) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format( args.name, iter_num, max_iter, acc_s_te) + '\n' + acc_list else: acc_s_te, _ = cal_acc(dset_loaders['test'], netF, netB, netC, False) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format( args.name, iter_num, max_iter, acc_s_te) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') netF.train() netB.train() if args.issave: torch.save( netF.state_dict(), osp.join(args.output_dir, "target_F_" + args.savename + ".pt")) torch.save( netB.state_dict(), osp.join(args.output_dir, "target_B_" + args.savename + ".pt")) torch.save( netC.state_dict(), osp.join(args.output_dir, "target_C_" + args.savename + ".pt")) return netF, netB, netC
def train_target(args): dset_loaders = data_load(args) ## set base network if args.net[0:3] == 'res': netF = network.ResBase(res_name=args.net).cuda() elif args.net[0:3] == 'vgg': netF = network.VGGBase(vgg_name=args.net).cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num = args.class_num, bottleneck_dim=args.bottleneck).cuda() if not args.ssl == 0: netR = network.feat_classifier(type='linear', class_num=4, bottleneck_dim=2*args.bottleneck).cuda() netR_dict, acc_rot = train_target_rot(args) netR.load_state_dict(netR_dict) modelpath = args.output_dir_src + '/source_F.pt' netF.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir_src + '/source_B.pt' netB.load_state_dict(torch.load(modelpath)) modelpath = args.output_dir_src + '/source_C.pt' netC.load_state_dict(torch.load(modelpath)) netC.eval() for k, v in netC.named_parameters(): v.requires_grad = False param_group = [] for k, v in netF.named_parameters(): if args.lr_decay1 > 0: param_group += [{'params': v, 'lr': args.lr * args.lr_decay1}] else: v.requires_grad = False for k, v in netB.named_parameters(): if args.lr_decay2 > 0: param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}] else: v.requires_grad = False if not args.ssl == 0: for k, v in netR.named_parameters(): param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}] netR.train() optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) max_iter = args.max_epoch * len(dset_loaders["target"]) interval_iter = max_iter // args.interval iter_num = 0 while iter_num < max_iter: optimizer.zero_grad() try: inputs_test, _, tar_idx = iter_test.next() except: iter_test = iter(dset_loaders["target"]) inputs_test, _, tar_idx = iter_test.next() if inputs_test.size(0) == 1: continue if iter_num % interval_iter == 0 and args.cls_par > 0: netF.eval() netB.eval() mem_label = obtain_label(dset_loaders['target_te'], netF, netB, netC, args) mem_label = torch.from_numpy(mem_label).cuda() netF.train() netB.train() inputs_test = inputs_test.cuda() iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) if args.cls_par > 0: pred = mem_label[tar_idx] features_test = netB(netF(inputs_test)) outputs_test = netC(features_test) if args.cls_par > 0: classifier_loss = nn.CrossEntropyLoss()(outputs_test, pred) classifier_loss *= args.cls_par if iter_num < interval_iter and args.dset == "VISDA-C": classifier_loss *= 0 else: classifier_loss = torch.tensor(0.0).cuda() if args.ent: softmax_out = nn.Softmax(dim=1)(outputs_test) entropy_loss = torch.mean(loss.Entropy(softmax_out)) if args.gent: msoftmax = softmax_out.mean(dim=0) gentropy_loss = torch.sum(-msoftmax * torch.log(msoftmax + args.epsilon)) entropy_loss -= gentropy_loss im_loss = entropy_loss * args.ent_par classifier_loss += im_loss classifier_loss.backward() if not args.ssl == 0: r_labels_target = np.random.randint(0, 4, len(inputs_test)) r_inputs_target = rotation.rotate_batch_with_labels(inputs_test, r_labels_target) r_labels_target = torch.from_numpy(r_labels_target).cuda() r_inputs_target = r_inputs_target.cuda() f_outputs = netB(netF(inputs_test)) f_outputs = f_outputs.detach() f_r_outputs = netB(netF(r_inputs_target)) r_outputs_target = netR(torch.cat((f_outputs, f_r_outputs), 1)) rotation_loss = args.ssl * nn.CrossEntropyLoss()(r_outputs_target, r_labels_target) rotation_loss.backward() optimizer.step() if iter_num % interval_iter == 0 or iter_num == max_iter: netF.eval() netB.eval() if args.dset=='VISDA-C': acc_s_te, acc_list = cal_acc(dset_loaders['test'], netF, netB, netC, True) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(args.name, iter_num, max_iter, acc_s_te) + '\n' + acc_list else: acc_s_te, _ = cal_acc(dset_loaders['test'], netF, netB, netC, False) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(args.name, iter_num, max_iter, acc_s_te) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str+'\n') netF.train() netB.train() if args.issave: torch.save(netF.state_dict(), osp.join(args.output_dir, "target_F_" + args.savename + ".pt")) torch.save(netB.state_dict(), osp.join(args.output_dir, "target_B_" + args.savename + ".pt")) torch.save(netC.state_dict(), osp.join(args.output_dir, "target_C_" + args.savename + ".pt")) return netF, netB, netC
def train_target(args): dset_loaders = data_load(args) ## set base network if args.net[0:3] == 'res': netF = network.ResBase(res_name=args.net).cuda() elif args.net[0:3] == 'vgg': netF = network.VGGBase(vgg_name=args.net).cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() args.modelpath = args.output_dir_src + '/source_F.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir_src + '/source_B.pt' netB.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir_src + '/source_C.pt' netC.load_state_dict(torch.load(args.modelpath)) netC.eval() for k, v in netC.named_parameters(): v.requires_grad = False param_group = [] for k, v in netF.named_parameters(): if args.lr_decay1 > 0: param_group += [{'params': v, 'lr': args.lr * args.lr_decay1}] else: v.requires_grad = False for k, v in netB.named_parameters(): if args.lr_decay2 > 0: param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}] else: v.requires_grad = False optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) tt = 0 iter_num = 0 max_iter = args.max_epoch * len(dset_loaders["target"]) interval_iter = max_iter // args.interval while iter_num < max_iter: try: inputs_test, _, tar_idx = iter_test.next() except: iter_test = iter(dset_loaders["target"]) inputs_test, _, tar_idx = iter_test.next() if inputs_test.size(0) == 1: continue if iter_num % interval_iter == 0: netF.eval() netB.eval() mem_label, ENT_THRESHOLD = obtain_label(dset_loaders['test'], netF, netB, netC, args) mem_label = torch.from_numpy(mem_label).cuda() netF.train() netB.train() inputs_test = inputs_test.cuda() iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) pred = mem_label[tar_idx] features_test = netB(netF(inputs_test)) outputs_test = netC(features_test) softmax_out = nn.Softmax(dim=1)(outputs_test) outputs_test_known = outputs_test[pred < args.class_num, :] pred = pred[pred < args.class_num] if len(pred) == 0: print(tt) del features_test del outputs_test tt += 1 continue if args.cls_par > 0: classifier_loss = nn.CrossEntropyLoss()(outputs_test_known, pred) classifier_loss *= args.cls_par else: classifier_loss = torch.tensor(0.0).cuda() if args.ent: softmax_out_known = nn.Softmax(dim=1)(outputs_test_known) entropy_loss = torch.mean(loss.Entropy(softmax_out_known)) if args.gent: msoftmax = softmax_out.mean(dim=0) gentropy_loss = torch.sum(-msoftmax * torch.log(msoftmax + args.epsilon)) entropy_loss -= gentropy_loss classifier_loss += entropy_loss * args.ent_par optimizer.zero_grad() classifier_loss.backward() optimizer.step() if iter_num % interval_iter == 0 or iter_num == max_iter: netF.eval() netB.eval() acc_os1, acc_os2, acc_unknown = cal_acc(dset_loaders['test'], netF, netB, netC, True, ENT_THRESHOLD) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}% / {:.2f}% / {:.2f}%'.format( args.name, iter_num, max_iter, acc_os2, acc_os1, acc_unknown) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') netF.train() netB.train() if args.issave: torch.save( netF.state_dict(), osp.join(args.output_dir, "target_F_" + args.savename + ".pt")) torch.save( netB.state_dict(), osp.join(args.output_dir, "target_B_" + args.savename + ".pt")) torch.save( netC.state_dict(), osp.join(args.output_dir, "target_C_" + args.savename + ".pt")) return netF, netB, netC
def train(args, model, ad_net, random_layer, train_loader, train_loader1, optimizer, optimizer_ad, epoch, start_epoch, method, D_s, D_t, G_s2t, G_t2s, criterion_Sem, criterion_GAN, criterion_cycle, criterion_identity, optimizer_G, optimizer_D_t, optimizer_D_s, classifier1, classifier1_optim, fake_S_buffer, fake_T_buffer): model.train() len_source = len(train_loader) len_target = len(train_loader1) if len_source > len_target: num_iter = len_source else: num_iter = len_target for batch_idx in range(num_iter): if batch_idx % len_source == 0: iter_source = iter(train_loader) if batch_idx % len_target == 0: iter_target = iter(train_loader1) data_source, label_source = iter_source.next() # data_source, label_source = data_source.cuda(), label_source.cuda() data_target, label_target = iter_target.next() # data_target = data_target.cuda() optimizer.zero_grad() optimizer_ad.zero_grad() features_source, outputs_source = model(data_source) features_target, outputs_target = model(data_target) features = torch.cat((features_source, features_target), dim=0) outputs = torch.cat((outputs_source, outputs_target), dim=0) loss = nn.CrossEntropyLoss()(outputs.narrow(0, 0, data_source.size(0)), label_source) softmax_output = nn.Softmax(dim=1)(outputs) output1 = classifier1(features) softmax_output1 = nn.Softmax(dim=1)(output1) softmax_output = (1 - args.cla_plus_weight) * softmax_output + args.cla_plus_weight * softmax_output1 if epoch > start_epoch: if method == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) loss += loss_func.CDAN([features, softmax_output], ad_net, entropy, network.calc_coeff(num_iter*(epoch-start_epoch)+batch_idx), random_layer) elif method == 'CDAN': loss += loss_func.CDAN([features, softmax_output], ad_net, None, None, random_layer) elif method == 'DANN': loss += loss_func.DANN(features, ad_net) else: raise ValueError('Method cannot be recognized.') # Cycle num_feature = features.size(0) # =================train discriminator T real_label = Variable(torch.ones(num_feature)) # real_label = Variable(torch.ones(num_feature)).cuda() fake_label = Variable(torch.zeros(num_feature)) # fake_label = Variable(torch.zeros(num_feature)).cuda() # 训练生成器 optimizer_G.zero_grad() # Identity loss same_t = G_s2t(features_target) loss_identity_t = criterion_identity(same_t, features_target) same_s = G_t2s(features_source) loss_identity_s = criterion_identity(same_s, features_source) # Gan loss fake_t = G_s2t(features_source) pred_fake = D_t(fake_t) loss_G_s2t = criterion_GAN(pred_fake, label_source.float()) fake_s = G_t2s(features_target) pred_fake = D_s(fake_s) loss_G_t2s = criterion_GAN(pred_fake, label_source.float()) # cycle loss recovered_s = G_t2s(fake_t) loss_cycle_sts = criterion_cycle(recovered_s, features_source) recovered_t = G_s2t(fake_s) loss_cycle_tst = criterion_cycle(recovered_t, features_target) # sem loss pred_recovered_s = model.classifier(recovered_s) pred_fake_t = model.classifier(fake_t) loss_sem_t2s = criterion_Sem(pred_recovered_s, pred_fake_t) pred_recovered_t = model.classifier(recovered_t) pred_fake_s = model.classifier(fake_s) loss_sem_s2t = criterion_Sem(pred_recovered_t, pred_fake_s) loss_cycle = loss_cycle_tst + loss_cycle_sts weight_in_loss_g = args.weight_in_loss_g.split(',') loss_G = float(weight_in_loss_g[0]) * (loss_identity_s + loss_identity_t) + \ float(weight_in_loss_g[1]) * (loss_G_s2t + loss_G_t2s) + \ float(weight_in_loss_g[2]) * loss_cycle + \ float(weight_in_loss_g[3]) * (loss_sem_s2t + loss_sem_t2s) # 训练softmax分类器 outputs_fake = classifier1(fake_t.detach()) # 分类器优化 classifier_loss1 = nn.CrossEntropyLoss()(outputs_fake, label_source) classifier1_optim.zero_grad() classifier_loss1.backward() classifier1_optim.step() total_loss = loss + args.cyc_loss_weight * loss_G total_loss.backward() optimizer.step() optimizer_G.step() ###### Discriminator S ###### optimizer_D_s.zero_grad() # Real loss pred_real = D_s(features_source.detach()) loss_D_real = criterion_GAN(pred_real, real_label) # Fake loss fake_s = fake_S_buffer.push_and_pop(fake_s) pred_fake = D_s(fake_s.detach()) loss_D_fake = criterion_GAN(pred_fake, fake_label) # Total loss loss_D_s = loss_D_real + loss_D_fake loss_D_s.backward() optimizer_D_s.step() ################################### ###### Discriminator t ###### optimizer_D_t.zero_grad() # Real loss pred_real = D_t(features_target.detach()) loss_D_real = criterion_GAN(pred_real, real_label) # Fake loss fake_t = fake_T_buffer.push_and_pop(fake_t) pred_fake = D_t(fake_t.detach()) loss_D_fake = criterion_GAN(pred_fake, fake_label) # Total loss loss_D_t = loss_D_real + loss_D_fake loss_D_t.backward() optimizer_D_t.step() if epoch > start_epoch: optimizer_ad.step() if (batch_idx + epoch * num_iter) % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tLoss+G: {:.6f}'.format( epoch, batch_idx * args.batch_size, num_iter * args.batch_size, 100. * batch_idx / num_iter, loss.item(), total_loss.item()))
def train(config): ## set pre-process prep_dict = {} prep_config = config["prep"] prep_dict["source"] = prep.image_train(**config["prep"]['params']) prep_dict["target"] = prep.image_train(**config["prep"]['params']) if prep_config["test_10crop"]: prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params']) else: prep_dict["test"] = prep.image_test(**config["prep"]['params']) tensor_writer = SummaryWriter(config["tensorboard_path"]) ## prepare data dsets = {} dset_loaders = {} data_config = config["data"] train_bs = data_config["source"]["batch_size"] test_bs = data_config["test"]["batch_size"] dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \ transform=prep_dict["source"]) dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \ shuffle=True, num_workers=4, drop_last=True) dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \ transform=prep_dict["target"]) dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \ shuffle=True, num_workers=4, drop_last=True) if prep_config["test_10crop"]: for i in range(10): dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \ transform=prep_dict["test"][i]) for i in range(10)] dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \ shuffle=False, num_workers=4) for dset in dsets['test']] else: dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \ transform=prep_dict["test"]) dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \ shuffle=False, num_workers=4) class_num = config["network"]["params"]["class_num"] ## set base network net_config = config["network"] base_network = net_config["name"](**net_config["params"]) base_network = base_network.cuda() parameter_list = base_network.get_parameters() ## set optimizer optimizer_config = config["optimizer"] optimizer = optimizer_config["type"](parameter_list, \ **(optimizer_config["optim_params"])) param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group["lr"]) schedule_param = optimizer_config["lr_param"] lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]] gpus = config['gpu'].split(',') if len(gpus) > 1: base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus]) ## train len_train_source = len(dset_loaders["source"]) len_train_target = len(dset_loaders["target"]) best_acc = 0.0 for i in range(config["num_iterations"]): if i % config["test_interval"] == config["test_interval"] - 1 or i==0: base_network.train(False) temp_acc, output, prediction, label, feature = image_classification_test(dset_loaders, \ base_network, test_10crop=prep_config["test_10crop"]) temp_model = nn.Sequential(base_network) if temp_acc > best_acc: best_acc = temp_acc best_model = temp_model log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc) config["out_file"].write(log_str+"\n") config["out_file"].flush() print(log_str) if i % config["snapshot_interval"] == 0: torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \ "iter_{:05d}_model.pth.tar".format(i))) loss_params = config["loss"] ## train one iter base_network.train(True) optimizer = lr_scheduler(optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train_source == 0: iter_source = iter(dset_loaders["source"]) if i % len_train_target == 0: iter_target = iter(dset_loaders["target"]) inputs_source, labels_source = iter_source.next() inputs_target, labels_target = iter_target.next() inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() features_source, outputs_source = base_network(inputs_source) features_target, outputs_target = base_network(inputs_target) outputs_target_temp = outputs_target / config['temperature'] target_softmax_out_temp = nn.Softmax(dim=1)(outputs_target_temp) target_entropy_weight = loss.Entropy(target_softmax_out_temp).detach() target_entropy_weight = 1 + torch.exp(-target_entropy_weight) target_entropy_weight = train_bs * target_entropy_weight / torch.sum(target_entropy_weight) cov_matrix_t = target_softmax_out_temp.mul(target_entropy_weight.view(-1,1)).transpose(1,0).mm(target_softmax_out_temp) cov_matrix_t = cov_matrix_t / torch.sum(cov_matrix_t, dim=1) mcc_loss = (torch.sum(cov_matrix_t) - torch.trace(cov_matrix_t)) / class_num classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) total_loss = classifier_loss + mcc_loss total_loss.backward() optimizer.step() tensor_writer.add_scalar('total_loss', total_loss, i) tensor_writer.add_scalar('classifier_loss', classifier_loss, i) tensor_writer.add_scalar('cov_matrix_penalty', mcc_loss, i) torch.save(best_model, osp.join(config["output_path"], "best_model.pth.tar")) return best_acc
def train_target(args, zz=''): dset_loaders = data_load(args) ## set base network if args.net[0:3] == 'res': netF = network.ResBase(res_name=args.net).cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() args.modelpath = args.output_dir_src + '/source_F_' + str(zz) + '.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir_src + '/source_B_' + str(zz) + '.pt' netB.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir_src + '/source_C_' + str(zz) + '.pt' netC.load_state_dict(torch.load(args.modelpath)) netC.eval() for k, v in netC.named_parameters(): v.requires_grad = False param_group = [] for k, v in netF.named_parameters(): if args.lr_decay1 > 0: param_group += [{'params': v, 'lr': args.lr * args.lr_decay1}] else: v.requires_grad = False for k, v in netB.named_parameters(): if args.lr_decay2 > 0: param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}] else: v.requires_grad = False optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4, nesterov=True) for epoch in tqdm(range(args.max_epoch), leave=False): netF.eval() netB.eval() mem_label = obtain_label(dset_loaders['test'], netF, netB, netC, args) mem_label = torch.from_numpy(mem_label).cuda() netF.train() netB.train() iter_test = iter(dset_loaders['target']) for _, (inputs_test, _, tar_idx) in tqdm(enumerate(iter_test), leave=False): if inputs_test.size(0) == 1: continue inputs_test = inputs_test.cuda() pred = mem_label[tar_idx] features_test = netB(netF(inputs_test)) outputs_test = netC(features_test) classifier_loss = loss.CrossEntropyLabelSmooth( num_classes=args.class_num, epsilon=0)(outputs_test, pred) classifier_loss *= args.cls_par if args.ent: softmax_out = nn.Softmax(dim=1)(outputs_test) entropy_loss = torch.mean(loss.Entropy(softmax_out)) if args.gent: msoftmax = softmax_out.mean(dim=0) gentropy_loss = torch.sum( -msoftmax * torch.log(msoftmax + args.epsilon)) entropy_loss -= gentropy_loss classifier_loss += entropy_loss * args.ent_par optimizer.zero_grad() classifier_loss.backward() optimizer.step() netF.eval() netB.eval() acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format( args.name, epoch + 1, args.max_epoch, acc * 100) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') if args.issave: torch.save( netF.state_dict(), osp.join(args.output_dir, 'target_F_' + args.savename + '.pt')) torch.save( netB.state_dict(), osp.join(args.output_dir, 'target_B_' + args.savename + '.pt')) torch.save( netC.state_dict(), osp.join(args.output_dir, 'target_C_' + args.savename + '.pt')) return netF, netB, netC
def train(config): ## set pre-process prep_dict = {} prep_config = config["prep"] prep_dict["source"] = prep.image_train(**config["prep"]['params']) prep_dict["target"] = prep.image_train(**config["prep"]['params']) if prep_config["test_10crop"]: prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params']) else: prep_dict["test"] = prep.image_test(**config["prep"]['params']) ## prepare data dsets = {} dset_loaders = {} data_config = config["data"] train_bs = data_config["source"]["batch_size"] test_bs = data_config["test"]["batch_size"] dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \ transform=prep_dict["source"]) dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \ shuffle=True, num_workers=0, drop_last=True) dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \ transform=prep_dict["target"]) dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \ shuffle=True, num_workers=0, drop_last=True) if prep_config["test_10crop"]: for i in range(10): dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \ transform=prep_dict["test"][i]) for i in range(10)] dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \ shuffle=False, num_workers=0) for dset in dsets['test']] else: dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \ transform=prep_dict["test"]) dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \ shuffle=False, num_workers=0) class_num = config["network"]["params"]["class_num"] ## set base network net_config = config["network"] base_network = net_config["name"](**net_config["params"]) # base_network = base_network.cuda() ## 添加判别器D_s,D_t,生成器G_s2t,G_t2s z_dimension = 256 D_s = network.models["Discriminator"]() # D_s = D_s.cuda() G_s2t = network.models["Generator"](z_dimension, 1024) # G_s2t = G_s2t.cuda() D_t = network.models["Discriminator"]() # D_t = D_t.cuda() G_t2s = network.models["Generator"](z_dimension, 1024) # G_t2s = G_t2s.cuda() criterion_GAN = torch.nn.MSELoss() criterion_cycle = torch.nn.L1Loss() criterion_identity = torch.nn.L1Loss() criterion_Sem = torch.nn.L1Loss() optimizer_G = torch.optim.Adam(itertools.chain(G_s2t.parameters(), G_t2s.parameters()), lr=0.0003) optimizer_D_s = torch.optim.Adam(D_s.parameters(), lr=0.0003) optimizer_D_t = torch.optim.Adam(D_t.parameters(), lr=0.0003) fake_S_buffer = ReplayBuffer() fake_T_buffer = ReplayBuffer() classifier_optimizer = torch.optim.Adam(base_network.parameters(), lr=0.0003) ## 添加分类器 classifier1 = net.Net(256,class_num) # classifier1 = classifier1.cuda() classifier1_optim = optim.Adam(classifier1.parameters(), lr=0.0003) ## add additional network for some methods if config["loss"]["random"]: random_layer = network.RandomLayer([base_network.output_num(), class_num], config["loss"]["random_dim"]) ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024) else: random_layer = None ad_net = network.AdversarialNetwork(base_network.output_num() * class_num, 1024) if config["loss"]["random"]: random_layer.cuda() # ad_net = ad_net.cuda() parameter_list = base_network.get_parameters() + ad_net.get_parameters() ## set optimizer optimizer_config = config["optimizer"] optimizer = optimizer_config["type"](parameter_list, \ **(optimizer_config["optim_params"])) param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group["lr"]) schedule_param = optimizer_config["lr_param"] lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]] gpus = config['gpu'].split(',') if len(gpus) > 1: ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus]) base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus]) ## train len_train_source = len(dset_loaders["source"]) len_train_target = len(dset_loaders["target"]) transfer_loss_value = classifier_loss_value = total_loss_value = 0.0 best_acc = 0.0 for i in range(config["num_iterations"]): if i % config["test_interval"] == config["test_interval"] - 1: base_network.train(False) temp_acc = image_classification_test(dset_loaders, \ base_network, test_10crop=prep_config["test_10crop"]) temp_model = nn.Sequential(base_network) if temp_acc > best_acc: best_acc = temp_acc best_model = temp_model now = datetime.datetime.now() d = str(now.month) + '-' + str(now.day) + ' ' + str(now.hour) + ':' + str(now.minute) + ":" + str( now.second) torch.save(best_model, osp.join(config["output_path"], "{}_to_{}_best_model_acc-{}_{}.pth.tar".format(args.source, args.target, best_acc, d))) log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc) config["out_file"].write(log_str + "\n") config["out_file"].flush() print(log_str) if i % config["snapshot_interval"] == 0: torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \ "{}_to_{}_iter_{:05d}_model_{}.pth.tar".format(args.source, args.target, i, str( datetime.datetime.utcnow())))) print("it_train: {:05d} / {:05d} start".format(i, config["num_iterations"])) loss_params = config["loss"] ## train one iter classifier1.train(True) base_network.train(True) ad_net.train(True) optimizer = lr_scheduler(optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train_source == 0: iter_source = iter(dset_loaders["source"]) if i % len_train_target == 0: iter_target = iter(dset_loaders["target"]) inputs_source, labels_source = iter_source.next() inputs_target, labels_target = iter_target.next() # inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() # 提取特征 features_source, outputs_source = base_network(inputs_source) features_target, outputs_target = base_network(inputs_target) features = torch.cat((features_source, features_target), dim=0) outputs = torch.cat((outputs_source, outputs_target), dim=0) softmax_out = nn.Softmax(dim=1)(outputs) outputs_source1 = classifier1(features_source.detach()) outputs_target1 = classifier1(features_target.detach()) outputs1 = torch.cat((outputs_source1,outputs_target1),dim=0) softmax_out1 = nn.Softmax(dim=1)(outputs1) softmax_out = (1-args.cla_plus_weight)*softmax_out + args.cla_plus_weight*softmax_out1 if config['method'] == 'CDAN+E': entropy = loss.Entropy(softmax_out) transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy, network.calc_coeff(i), random_layer) elif config['method'] == 'CDAN': transfer_loss = loss.CDAN([features, softmax_out], ad_net, None, None, random_layer) elif config['method'] == 'DANN': transfer_loss = loss.DANN(features, ad_net) else: raise ValueError('Method cannot be recognized.') classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) # Cycle num_feature = features_source.size(0) # =================train discriminator T real_label = Variable(torch.ones(num_feature)) # real_label = Variable(torch.ones(num_feature)).cuda() fake_label = Variable(torch.zeros(num_feature)) # fake_label = Variable(torch.zeros(num_feature)).cuda() # 训练生成器 optimizer_G.zero_grad() # Identity loss same_t = G_s2t(features_target.detach()) loss_identity_t = criterion_identity(same_t, features_target) same_s = G_t2s(features_source.detach()) loss_identity_s = criterion_identity(same_s, features_source) # Gan loss fake_t = G_s2t(features_source.detach()) pred_fake = D_t(fake_t) loss_G_s2t = criterion_GAN(pred_fake, labels_source.float()) fake_s = G_t2s(features_target.detach()) pred_fake = D_s(fake_s) loss_G_t2s = criterion_GAN(pred_fake, labels_source.float()) # cycle loss recovered_s = G_t2s(fake_t) loss_cycle_sts = criterion_cycle(recovered_s, features_source) recovered_t = G_s2t(fake_s) loss_cycle_tst = criterion_cycle(recovered_t, features_target) # sem loss pred_recovered_s = base_network.fc(recovered_s) pred_fake_t = base_network.fc(fake_t) loss_sem_t2s = criterion_Sem(pred_recovered_s, pred_fake_t) pred_recovered_t = base_network.fc(recovered_t) pred_fake_s = base_network.fc(fake_s) loss_sem_s2t = criterion_Sem(pred_recovered_t, pred_fake_s) loss_cycle = loss_cycle_tst + loss_cycle_sts weights = args.weight_in_lossG.split(',') loss_G = float(weights[0]) * (loss_identity_s + loss_identity_t) + \ float(weights[1]) * (loss_G_s2t + loss_G_t2s) + \ float(weights[2]) * loss_cycle + \ float(weights[3]) * (loss_sem_s2t + loss_sem_t2s) # 训练softmax分类器 outputs_fake = classifier1(fake_t.detach()) # 分类器优化 classifier_loss1 = nn.CrossEntropyLoss()(outputs_fake, labels_source) classifier1_optim.zero_grad() classifier_loss1.backward() classifier1_optim.step() total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss + args.cyc_loss_weight*loss_G total_loss.backward() optimizer.step() optimizer_G.step() ###### Discriminator S ###### optimizer_D_s.zero_grad() # Real loss pred_real = D_s(features_source.detach()) loss_D_real = criterion_GAN(pred_real, real_label) # Fake loss fake_s = fake_S_buffer.push_and_pop(fake_s) pred_fake = D_s(fake_s.detach()) loss_D_fake = criterion_GAN(pred_fake, fake_label) # Total loss loss_D_s = loss_D_real + loss_D_fake loss_D_s.backward() optimizer_D_s.step() ################################### ###### Discriminator t ###### optimizer_D_t.zero_grad() # Real loss pred_real = D_t(features_target.detach()) loss_D_real = criterion_GAN(pred_real, real_label) # Fake loss fake_t = fake_T_buffer.push_and_pop(fake_t) pred_fake = D_t(fake_t.detach()) loss_D_fake = criterion_GAN(pred_fake, fake_label) # Total loss loss_D_t = loss_D_real + loss_D_fake loss_D_t.backward() optimizer_D_t.step() print("it_train: {:05d} / {:05d} over".format(i, config["num_iterations"])) now = datetime.datetime.now() d = str(now.month)+'-'+str(now.day)+' '+str(now.hour)+':'+str(now.minute)+":"+str(now.second) torch.save(best_model, osp.join(config["output_path"], "{}_to_{}_best_model_acc-{}_{}.pth.tar".format(args.source, args.target, best_acc,d))) return best_acc
def train(config): #################################################### # Tensorboard setting #################################################### #tensor_writer = SummaryWriter(config["tensorboard_path"]) #################################################### # Data setting #################################################### prep_dict = {} # 데이터 전처리 transforms 부분 prep_dict["source"] = prep.image_train(**config['prep']['params']) prep_dict["target"] = prep.image_train(**config["prep"]['params']) prep_dict["test"] = prep.image_test(**config['prep']['params']) dsets = {} dsets["source"] = datasets.ImageFolder(config['s_dset_path'], transform=prep_dict["source"]) dsets["target"] = datasets.ImageFolder(config['t_dset_path'], transform=prep_dict['target']) dsets['test'] = datasets.ImageFolder(config['t_dset_path'], transform=prep_dict['test']) data_config = config["data"] train_source_bs = data_config["source"][ "batch_size"] #원본은 source와 target 모두 source train bs로 설정되었는데 이를 수정함 train_target_bs = data_config['target']['batch_size'] test_bs = data_config["test"]["batch_size"] dset_loaders = {} dset_loaders["source"] = DataLoader( dsets["source"], batch_size=train_source_bs, shuffle=True, num_workers=4, drop_last=True ) # 원본은 drop_last=True, 이렇게 해야 마지막까지 source, target에서 동일한 수로 배치 생성가능 dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_target_bs, shuffle=True, num_workers=4, drop_last=True) dset_loaders['test'] = DataLoader(dsets['test'], batch_size=test_bs, shuffle=False, num_workers=4, drop_last=False) #################################################### # Network Setting #################################################### class_num = config["network"]['params']['class_num'] net_config = config["network"] """ config['network'] = {'name': network.ResNetFc, 'params': {'resnet_name': args.net, 'use_bottleneck': True, 'bottleneck_dim': 256, 'new_cls': True, 'class_num': args.class_num, 'type' : args.type} } """ base_network = net_config["name"](**net_config["params"]) #network.py에 정의된 ResNetFc() 클래스 호출 base_network = base_network.cuda() # ResNetFc(Resnet, True, 256, True, 12) if config["loss"]["random"]: random_layer = network.RandomLayer( [base_network.output_num(), class_num], config["loss"]["random_dim"]) random_layer.cuda() ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024) else: random_layer = None ad_net = network.AdversarialNetwork( base_network.output_num() * class_num, 1024) # 왜 class 수 만큼 곱하지? ad_net = ad_net.cuda() parameter_list = base_network.get_parameters() + ad_net.get_parameters() #################################################### # Env Setting #################################################### #gpus = config['gpu'].split(',') #if len(gpus) > 1 : #ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus]) #base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus]) #################################################### # Optimizer Setting #################################################### optimizer_config = config['optimizer'] optimizer = optimizer_config["type"](parameter_list, **(optimizer_config["optim_params"])) # optim.SGD #config['optimizer'] = {'type': optim.SGD, #'optim_params': {'lr': args.lr, #'momentum': 0.9, #'weight_decay': 0.0005, #'nestrov': True}, #'lr_type': "inv", #'lr_param': {"lr": args.lr, #'gamma': 0.001, # 이거 0.01이여야 하지 않나? #'power': 0.75 #} param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group['lr']) schedule_param = optimizer_config['lr_param'] lr_scheduler = lr_schedule.schedule_dict[ optimizer_config["lr_type"]] # return optimizer #################################################### # Train #################################################### len_train_source = len(dset_loaders["source"]) len_train_target = len(dset_loaders["target"]) transfer_loss_value = 0.0 classifier_loss_value = 0.0 total_loss_value = 0.0 best_acc = 0.0 batch_size = config["data"]["source"]["batch_size"] for i in range( config["num_iterations"]): # num_iterations수의 batch가 학습에 사용됨 sys.stdout.write("Iteration : {} \r".format(i)) sys.stdout.flush() loss_params = config["loss"] base_network.train(True) ad_net.train(True) optimizer = lr_scheduler(optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train_source == 0: iter_source = iter(dset_loaders["source"]) if i % len_train_target == 0: iter_target = iter(dset_loaders["target"]) inputs_source, labels_source = iter_source.next() inputs_target, labels_target = iter_target.next() inputs_source, labels_source = inputs_source.cuda( ), labels_source.cuda() inputs_target = inputs_target.cuda() inputs = torch.cat((inputs_source, inputs_target), dim=0) features, outputs, tau, cur_mean_source, cur_mean_target, output_mean_source, output_mean_target = base_network( inputs) softmax_out = nn.Softmax(dim=1)(outputs) outputs_source = outputs[:batch_size] outputs_target = outputs[batch_size:] if config['method'] == 'CDAN+E' or config['method'] == 'CDAN_TransNorm': entropy = loss.Entropy(softmax_out) transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy, network.calc_coeff(i), random_layer) elif config['method'] == 'CDAN': transfer_loss = loss.CDAN([features, softmax_out], ad_net, None, None, random_layer) elif config['method'] == 'DANN': pass # 나중에 정리하기 else: raise ValueError('Method cannot be recognized') classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss total_loss.backward() optimizer.step() #tensor_writer.add_scalar('total_loss', total_loss.i ) #tensor_writer.add_scalar('classifier_loss', classifier_loss, i) #tensor_writer.add_scalar('transfer_loss', transfer_loss, i) #################################################### # Test #################################################### if i % config["test_interval"] == config["test_interval"] - 1: # test interval 마다 base_network.train(False) temp_acc = image_classification_test(dset_loaders, base_network) temp_model = nn.Sequential(base_network) if temp_acc > best_acc: best_acc = temp_acc best_model = temp_model ACC = round(best_acc, 2) * 100 torch.save( best_model, os.path.join(config["output_path"], "iter_{}_model.pth.tar".format(ACC))) log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc) config["out_file"].write(log_str + "\n") config["out_file"].flush() print(log_str)
def train(config): ## set pre-process prep_dict = {} prep_config = config["prep"] prep_dict["source"] = prep.image_train(**config["prep"]['params']) prep_dict["target"] = prep.image_train(**config["prep"]['params']) if prep_config["test_10crop"]: prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params']) else: prep_dict["test"] = prep.image_test(**config["prep"]['params']) ## prepare data dsets = {} dset_loaders = {} data_config = config["data"] train_bs = data_config["source"]["batch_size"] test_bs = data_config["test"]["batch_size"] dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \ transform=prep_dict["source"]) dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \ shuffle=True, num_workers=4, drop_last=True) dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \ transform=prep_dict["target"]) dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \ shuffle=True, num_workers=4, drop_last=True) # if prep_config["test_10crop"]: # for i in range(10): # dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \ # transform=prep_dict["test"][i]) for i in range(10)] # dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \ # shuffle=False, num_workers=4) for dset in dsets['test']] # else: # dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \ # transform=prep_dict["test"]) # dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \ # shuffle=False, num_workers=4) class_num = config["network"]["params"]["class_num"] ## set base network net_config = config["network"] base_network = net_config["name"](**net_config["params"]) base_network = base_network.cuda() ## add additional network for some methods if config["loss"]["random"]: random_layer = network.RandomLayer( [base_network.output_num(), class_num], config["loss"]["random_dim"]) ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024) else: random_layer = None ad_net = network.AdversarialNetwork( base_network.output_num() * class_num, 1024) if config["loss"]["random"]: random_layer.cuda() ad_net = ad_net.cuda() parameter_list = base_network.get_parameters() + ad_net.get_parameters() ## set optimizer optimizer_config = config["optimizer"] optimizer = optimizer_config["type"](parameter_list, \ **(optimizer_config["optim_params"])) param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group["lr"]) schedule_param = optimizer_config["lr_param"] lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]] gpus = config['gpu'].split(',') if len(gpus) > 1: ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus]) base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus]) ## train len_train_source = len(dset_loaders["source"]) len_train_target = len(dset_loaders["target"]) transfer_loss_value = classifier_loss_value = total_loss_value = 0.0 best_acc = 0.0 for i in range(config["num_iterations"]): # if i % config["test_interval"] == config["test_interval"] - 1: # base_network.train(False) # temp_acc = image_classification_test(dset_loaders, \ # base_network, test_10crop=prep_config["test_10crop"]) # temp_model = nn.Sequential(base_network) # if temp_acc > best_acc: # best_acc = temp_acc # best_model = temp_model # log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc) # config["out_file"].write(log_str+"\n") # config["out_file"].flush() # print(log_str) if i % config["snapshot_interval"] == 0: torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \ "iter_{:05d}_model.pth.tar".format(i))) loss_params = config["loss"] ## train one iter base_network.train(True) ad_net.train(True) optimizer = lr_scheduler(optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train_source == 0: iter_source = iter(dset_loaders["source"]) if i % len_train_target == 0: iter_target = iter(dset_loaders["target"]) inputs_source, labels_source = iter_source.next() inputs_target, labels_target = iter_target.next() inputs_source, inputs_target, labels_source = inputs_source.cuda( ), inputs_target.cuda(), labels_source.cuda() features_source, outputs_source = base_network(inputs_source) features_target, outputs_target = base_network(inputs_target) features = torch.cat((features_source, features_target), dim=0) outputs = torch.cat((outputs_source, outputs_target), dim=0) softmax_out = nn.Softmax(dim=1)(outputs) if config['method'] == 'CDAN+E': entropy = loss.Entropy(softmax_out) transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy, network.calc_coeff(i), random_layer) elif config['method'] == 'CDAN': transfer_loss = loss.CDAN([features, softmax_out], ad_net, None, None, random_layer) elif config['method'] == 'DANN': transfer_loss = loss.DANN(features, ad_net) else: raise ValueError('Method cannot be recognized.') classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) if i % 10 == 0: print('iter: ', i, 'classifier_loss: ', classifier_loss.data, 'transfer_loss: ', transfer_loss.data) total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss total_loss.backward() optimizer.step() torch.save(best_model, osp.join(config["output_path"], "best_model.pth.tar")) return best_acc
def train_target(args): dset_loaders = digit_load(args) ## set base network if args.dset == 'u2m': netF = network.LeNetBase().cuda() elif args.dset == 'm2u': netF = network.LeNetBase().cuda() elif args.dset == 's2m': netF = network.DTNBase().cuda() netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda() netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() args.modelpath = args.output_dir + '/source_F.pt' netF.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_B.pt' netB.load_state_dict(torch.load(args.modelpath)) args.modelpath = args.output_dir + '/source_C.pt' netC.load_state_dict(torch.load(args.modelpath)) netC.eval() for k, v in netC.named_parameters(): v.requires_grad = False param_group = [] for k, v in netF.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] for k, v in netB.named_parameters(): param_group += [{'params': v, 'lr': args.lr}] optimizer = optim.SGD(param_group) optimizer = op_copy(optimizer) max_iter = args.max_epoch * len(dset_loaders["target"]) interval_iter = len(dset_loaders["target"]) # interval_iter = max_iter // args.interval iter_num = 0 while iter_num < max_iter: optimizer.zero_grad() try: inputs_test, _, tar_idx = iter_test.next() except: iter_test = iter(dset_loaders["target"]) inputs_test, _, tar_idx = iter_test.next() if inputs_test.size(0) == 1: continue if iter_num % interval_iter == 0 and args.cls_par > 0: netF.eval() netB.eval() mem_label = obtain_label(dset_loaders['target_te'], netF, netB, netC, args) mem_label = torch.from_numpy(mem_label).cuda() netF.train() netB.train() iter_num += 1 lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) inputs_test = inputs_test.cuda() features_test = netB(netF(inputs_test)) outputs_test = netC(features_test) if args.cls_par > 0: pred = mem_label[tar_idx] classifier_loss = args.cls_par * nn.CrossEntropyLoss()( outputs_test, pred) else: classifier_loss = torch.tensor(0.0).cuda() if args.ent: softmax_out = nn.Softmax(dim=1)(outputs_test) entropy_loss = torch.mean(loss.Entropy(softmax_out)) # if args.gent: # msoftmax = softmax_out.mean(dim=0) # entropy_loss -= torch.sum(-msoftmax * torch.log(msoftmax + 1e-5)) im_loss = entropy_loss * args.ent_par classifier_loss += im_loss optimizer.zero_grad() classifier_loss.backward() optimizer.step() if iter_num % interval_iter == 0 or iter_num == max_iter: netF.eval() netB.eval() acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC) log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format( args.dset, iter_num, max_iter, acc) args.out_file.write(log_str + '\n') args.out_file.flush() print(log_str + '\n') netF.train() netB.train() if args.issave: torch.save( netF.state_dict(), osp.join(args.output_dir, "target_F_" + args.savename + ".pt")) torch.save( netB.state_dict(), osp.join(args.output_dir, "target_B_" + args.savename + ".pt")) torch.save( netC.state_dict(), osp.join(args.output_dir, "target_C_" + args.savename + ".pt")) return netF, netB, netC