def train_fada(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32, dilation=config['dilation']) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32, dilation=config['dilation']) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) critic = Critic2(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() critic = critic.cuda() criterion = torch.nn.CrossEntropyLoss() loss_class = torch.nn.CrossEntropyLoss() loss_domain = torch.nn.CrossEntropyLoss() res_dir = os.path.join(config['res_dir'], 'snr{}-lr{}'.format(config['snr'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) set_log_config(res_dir) logging.debug('train_dann') logging.debug(extractor) logging.debug(classifier) logging.debug(critic) logging.debug(config) optimizer = optim.Adam([{ 'params': extractor.parameters() }, { 'params': classifier.parameters() }, { 'params': critic.parameters() }], lr=config['lr']) # TODO discriminator = main_models.DCD(input_features=128) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=0.001) # source samples预训练 #--------------pretrain g and h for step 1--------------------------------- for epoch in range(config['n_epoches_1']): for data, labels in config['source_train_loader']: data = data.to(device) labels = labels.to(device) optimizer.zero_grad() y_pred = classifier(extractor(data)) loss = loss_class(y_pred, labels) loss.backward() optimizer.step() acc = 0 for data, labels in config['target_test_loader']: data = data.to(device) labels = labels.to(device) y_test_pred = classifier(extractor(data)) acc += (torch.max(y_test_pred, 1)[1] == labels).float().mean().item() accuracy = round(acc / float(len(config['target_test_loader'])), 3) print("step1----Epoch %d/%d accuracy: %.3f " % (epoch + 1, config['n_epoches_1'], accuracy)) #-----------------train DCD for step 2-------------------------------- # X_s,Y_s=dataloader.sample_data() # X_t,Y_t=dataloader.create_target_samples(config['n_target_samples']) for epoch in range(config['n_epoches_2']): # for data,labels in config['source_train_loader']: iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for i in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, label_target = iter_target.next() if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target, label_target = data_target.cuda( ), label_target.cuda() groups, aa = dataloader.sample_groups(data_source, label_source, data_target, label_target, seed=epoch) # groups, aa = dataloader.sample_groups(X_s,Y_s,X_t,Y_t,seed=epoch) n_iters = 4 * len(groups[1]) index_list = torch.randperm(n_iters) mini_batch_size = 40 #use mini_batch train can be more stable loss_mean = [] X1 = [] X2 = [] ground_truths = [] for index in range(n_iters): ground_truth = index_list[index] // len(groups[1]) x1, x2 = groups[ground_truth][index_list[index] - len(groups[1]) * ground_truth] X1.append(x1) X2.append(x2) ground_truths.append(ground_truth) #select data for a mini-batch to train if (index + 1) % mini_batch_size == 0: X1 = torch.stack(X1) X2 = torch.stack(X2) ground_truths = torch.LongTensor(ground_truths) X1 = X1.to(device) X2 = X2.to(device) ground_truths = ground_truths.to(device) optimizer_D.zero_grad() X_cat = torch.cat([extractor(X1), extractor(X2)], 1) y_pred = discriminator(X_cat.detach()) loss = loss_domain(y_pred, ground_truths) loss.backward() optimizer_D.step() loss_mean.append(loss.item()) X1 = [] X2 = [] ground_truths = [] print("step2----Epoch %d/%d loss:%.3f" % (epoch + 1, config['n_epoches_2'], np.mean(loss_mean)))
def train_pada(config): if config['network'] == 'inceptionv1': extractor_s = InceptionV1(num_classes=32) extractor_t = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor_s = InceptionV1s(num_classes=32) extractor_t = InceptionV1s(num_classes=32) else: extractor_s = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) extractor_t = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier_s = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) classifier_t = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor_s = extractor_s.cuda() classifier_s = classifier_s.cuda() extractor_t = extractor_t.cuda() classifier_t = classifier_t.cuda() cdan_random = config['random_layer'] res_dir = os.path.join( config['res_dir'], 'normal{}-{}-cons{}-lr{}'.format(config['normal'], config['network'], config['pada_cons_w'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_pada') print(config) set_log_config(res_dir) logging.debug('train_pada') # logging.debug(extractor) # logging.debug(classifier) logging.debug(config) if config['models'] == 'PADA': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer_s = torch.optim.Adam([{ 'params': extractor_s.parameters(), 'lr': config['lr'] }, { 'params': classifier_s.parameters(), 'lr': config['lr'] }]) optimizer_t = torch.optim.Adam([{ 'params': extractor_t.parameters(), 'lr': config['lr'] }, { 'params': classifier_t.parameters(), 'lr': config['lr'] }]) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr']) def train_stage1(extractor_s, classifier_s, config, epoch): extractor_s.train() classifier_s.train() # STAGE 1: # 在labeled source上训练extractor_s和classifier_s # 训练完成后freeze这两个model iter_source = iter(config['source_train_loader']) len_source_loader = len(config['source_train_loader']) for step in range(1, len_source_loader + 1): data_source, label_source = iter_source.next() if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() optimizer_s.zero_grad() h_s = extractor_s(data_source) h_s = h_s.view(h_s.size(0), -1) source_preds = classifier_s(h_s) cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) cls_loss.backward() optimizer_s.step() def train(extractor_s, classifier_s, extractor_t, classifier_t, ad_net, config, epoch): start_epoch = 0 # extractor_s.train() # classifier_s.train() # ad_net.train() # # STAGE 1: # # 在labeled source上训练extractor_s和classifier_s # # 训练完成后freeze这两个model # iter_source = iter(config['source_train_loader']) # len_source_loader = len(config['source_train_loader']) # for step in range(1, len_source_loader + 1): # data_source, label_source = iter_source.next() # if torch.cuda.is_available(): # data_source, label_source = data_source.cuda(), label_source.cuda() # optimizer_s.zero_grad() # h_s = extractor_s(data_source) # h_s = h_s.view(h_s.size(0), -1) # source_preds = classifier_s(h_s) # cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) # cls_loss.backward() # optimizer_s.step() # for param in extractor_s.parameters(): # param.requires_grad = False # for param in classifier_s.parameters(): # param.requires_grad = False # STAGE 2: # 使用新的extractor和classifier进行DANN训练 # 不同的地方是,每个target 同时使用extractor_s和extractor_t extractor_t.train() classifier_t.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, label_target = iter_target.next() if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target, label_target = data_target.cuda( ), label_target.cuda() optimizer_t.zero_grad() optimizer_ad.zero_grad() h_s = extractor_t(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor_t(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier_t(h_s) cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) softmax_output_s = nn.Softmax(dim=1)(source_preds) target_preds = classifier_t(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) if config['target_labeling'] == 1: cls_loss += nn.CrossEntropyLoss()(target_preds, label_target) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) d_loss = loss_func.CDAN( [feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter * (epoch - start_epoch) + step), random_layer) elif config['models'] == 'CDAN': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'PADA': d_loss = loss_func.DANN(feature, ad_net, gamma) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 # constraints loss h_s_prev = extractor_s(data_source) cons_loss = nn.L1Loss()(h_s, h_s_prev) loss = cls_loss + d_loss + config['pada_cons_w'] * cons_loss loss.backward() optimizer_t.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 20 == 0: print( 'Train Epoch {} closs {:.6f}, dloss {:.6f}, cons_loss {:.6f}, Loss {:.6f}' .format(epoch, cls_loss.item(), d_loss.item(), cons_loss.item(), loss.item())) for epoch in range(1, config['n_epochs'] + 1): train_stage1(extractor_s, classifier_s, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: print('test on source_test_loader') test(extractor_s, classifier_s, config['source_test_loader'], epoch) # print('test on target_test_loader') # accuracy = test(extractor_s, classifier_s, config['target_test_loader'], epoch) extractor_t.load_state_dict(extractor_s.state_dict()) classifier_t.load_state_dict(classifier_s.state_dict()) for param in extractor_s.parameters(): param.requires_grad = False for param in classifier_s.parameters(): param.requires_grad = False for epoch in range(1, config['n_epochs'] + 1): train(extractor_s, classifier_s, extractor_t, classifier_t, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor_t, classifier_t, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor_t, classifier_t, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor_t, classifier_t, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True)
def train_cnn(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32, dilation=config['dilation']) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32, dilation=config['dilation']) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() res_dir = os.path.join(config['res_dir'], 'normal{}-{}-lr{}'.format(config['normal'], config['network'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) set_log_config(res_dir) logging.debug(extractor) logging.debug(classifier) logging.debug(config) criterion = torch.nn.CrossEntropyLoss() # optimizer = optim.Adam( # list(extractor.parameters()) + list(classifier.parameters()), # lr = config['lr']) opts = optim.SGD( list(extractor.parameters()) + list(classifier.parameters()), lr = config['lr'], nesterov=True, momentum=0.9) def train(extractor, classifier, config, epoch): extractor.train() classifier.train() optimizer = inv_lr_scheduler(opts, epoch, gamma=0.01, power=0.75, lr=config['lr'], weight_decay=0.0005) for step, (features, labels) in enumerate(config['source_train_loader']): if torch.cuda.is_available(): features, labels = features.cuda(), labels.cuda() optimizer.zero_grad() # if config['aux_classifier'] == 1: # x1, x2, x3 = extractor(features) # preds = classifier(x1, x2, x3) preds = classifier(extractor(features)) # print('preds {}, labels {}'.format(preds.shape, labels.shape)) # print(preds[0]) # preds_l = F.softmax(preds, dim=1) # print('preds_l {}'.format(preds_l.shape)) # print(preds_l[0]) # print('------') loss = criterion(preds, labels) loss.backward() optimizer.step() for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: print('test on source_test_loader') test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], epoch)
def train_adda(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], bn=config['bn']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) criterion = torch.nn.CrossEntropyLoss() loss_class = torch.nn.CrossEntropyLoss() loss_domain = torch.nn.CrossEntropyLoss() lr = config['lr'] res_dir = os.path.join(config['res_dir'], 'lr{}'.format(config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) set_log_config(res_dir) logging.debug('train_adda') logging.debug(config) def pretrain(model, config, pretrain_epochs): model.class_classifier.train() model.feature.train() optimizer = optim.Adam(model.parameters(), lr=lr) for epoch in range(pretrain_epochs): for step, (features, labels) in enumerate(config['source_train_loader']): if torch.cuda.is_available(): features, labels = features.cuda(), labels.cuda() optimizer.zero_grad() preds = model.class_classify(features) loss = criterion(preds, labels) loss.backward() optimizer.step() def train(model, config, epoch): model.class_classifier.train() model.feature.train() # LEARNING_RATE = lr / math.pow((1 + 10 * (epoch - 1) / config['n_epochs']), 0.75) # print('epoch {}, learning rate{: .4f}'.format(epoch, LEARNING_RATE) ) # optimizer = torch.optim.SGD([ # {'params': model.feature.parameters()}, # {'params': model.class_classifier.parameters(), 'lr': LEARNING_RATE}, # ], lr= LEARNING_RATE / 10, momentum=momentum, weight_decay=l2_decay) optimizer = optim.Adam(model.parameters(), lr=lr) gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for i in range(1, num_iter): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda(), label_source.cuda() data_target = data_target.cuda() optimizer.zero_grad() class_output_s, domain_output, embeddings_s = model.dann(input_data=data_source, alpha=gamma) # print('domain_output {}'.format(domain_output.size())) err_s_label = loss_class(class_output_s, label_source) domain_label = torch.zeros(data_source.size(0)).long().cuda() err_s_domain = loss_domain(domain_output, domain_label) # Training model using target data domain_label = torch.ones(data_target.size(0)).long().cuda() class_output_t, domain_output, embeddings_t = model.dann(input_data=data_target, alpha=gamma) err_t_domain = loss_domain(domain_output, domain_label) err = err_s_label + err_s_domain + err_t_domain if i % 50 == 0: print('err_s_label {}, err_s_domain {}, gamma {}, err_t_domain {}, total err {}'.format(err_s_label.item(), err_s_domain.item(), gamma, err_t_domain.item(), err.item())) err.backward() optimizer.step() pretrain(model, config, pretrain_epochs=20) for epoch in range(1, config['n_epochs'] + 1): train(model, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: print('test on source_test_loader') test(extractor, classifier, config['source_test_loader'], epoch) # print('test on target_train_loader') # test(model, config['target_train_loader'], epoch) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, config['models']) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=True)
def train_cnn(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32, dilation=config['dilation']) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32, dilation=config['dilation']) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier2(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() res_dir = os.path.join( config['res_dir'], 'slim{}-snr{}-snrp{}-lr{}'.format(config['slim'], config['snr'], config['snrp'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) set_log_config(res_dir) logging.debug(extractor) logging.debug(classifier) logging.debug(config) criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(list(extractor.parameters()) + list(classifier.parameters()), lr=config['lr']) def train(extractor, classifier, config, epoch): extractor.train() classifier.train() for step, (features, labels) in enumerate(config['source_train_loader']): if torch.cuda.is_available(): features, labels = features.cuda(), labels.cuda() optimizer.zero_grad() # if config['aux_classifier'] == 1: # x1, x2, x3 = extractor(features) # preds = classifier(x1, x2, x3) preds, _ = classifier(extractor(features)) # print('preds {}, labels {}'.format(preds.shape, labels.shape)) # print(preds[0]) # preds_l = F.softmax(preds, dim=1) # print('preds_l {}'.format(preds_l.shape)) # print(preds_l[0]) # print('------') loss = criterion(preds, labels) loss.backward() optimizer.step() for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: print('test on source_test_loader') test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, config['models']) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=True) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=False)
def train_dann(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32, dilation=config['dilation']) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32, dilation=config['dilation']) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier2(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) critic = Critic2(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() critic = critic.cuda() criterion = torch.nn.CrossEntropyLoss() loss_class = torch.nn.CrossEntropyLoss() loss_domain = torch.nn.CrossEntropyLoss() res_dir = os.path.join( config['res_dir'], 'VIS-slim{}-targetLabel{}-mmd{}-bnm{}-vat{}-ent{}-ew{}-bn{}-bs{}-lr{}'. format(config['slim'], config['target_labeling'], config['mmd'], config['bnm'], config['vat'], config['ent'], config['bnm_ew'], config['bn'], config['batch_size'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) set_log_config(res_dir) logging.debug('train_dann') logging.debug(extractor) logging.debug(classifier) logging.debug(critic) logging.debug(config) optimizer = optim.Adam([{ 'params': extractor.parameters() }, { 'params': classifier.parameters() }, { 'params': critic.parameters() }], lr=config['lr']) vat_loss = VAT(extractor, classifier, n_power=1, radius=3.5).cuda() def dann(input_data, alpha): feature = extractor(input_data) feature = feature.view(feature.size(0), -1) reverse_feature = ReverseLayerF.apply(feature, alpha) class_output, _ = classifier(feature) domain_output = critic(reverse_feature) return class_output, domain_output, feature def train(extractor, classifier, critic, config, epoch): extractor.train() classifier.train() critic.train() gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 mmd_loss = MMD_loss() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) if config['slim'] > 0: iter_target_semi = iter(config['target_train_semi_loader']) len_target_semi_loader = len(config['target_train_semi_loader']) num_iter = len_source_loader for i in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, label_target = iter_target.next() if config['slim'] > 0: data_target_semi, label_target_semi = iter_target_semi.next() if i % len_target_semi_loader == 0: iter_target_semi = iter(config['target_train_semi_loader']) if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target, label_target = data_target.cuda( ), label_target.cuda() if config['slim'] > 0: data_target_semi, label_target_semi = data_target_semi.cuda( ), label_target_semi.cuda() optimizer.zero_grad() class_output_s, domain_output, feature_s = dann( input_data=data_source, alpha=gamma) # print('domain_output {}'.format(domain_output.size())) err_s_label = loss_class(class_output_s, label_source) domain_label = torch.zeros(data_source.size(0)).long().cuda() err_s_domain = loss_domain(domain_output, domain_label) # Training model using target data domain_label = torch.ones(data_target.size(0)).long().cuda() class_output_t, domain_output, feature_t = dann( input_data=data_target, alpha=gamma) #class_output_t, domain_output, _ = dann(input_data=data_target, alpha=0.5) err_t_domain = loss_domain(domain_output, domain_label) err = err_s_label + err_s_domain + err_t_domain # if config['target_labeling'] == 1: # err_t_class_healthy = nn.CrossEntropyLoss()(class_output_t, label_target) # err += err_t_class_healthy # if i % 100 == 0: # print('err_t_class_healthy {:.2f}'.format(err_t_class_healthy.item())) if config['mmd'] == 1: #err += gamma * mmd_linear(feature_s, feature_t) err += config['bnm_ew'] * mmd_loss(feature_s, feature_t) if config['bnm'] == 1 and epoch >= config['startiter']: err_t_bnm = config['bnm_ew'] * get_loss_bnm(class_output_t) err += err_t_bnm if i == 1: print('epoch {}, loss_t_bnm {:.2f}'.format( epoch, err_t_bnm.item())) if config['ent'] == 1 and epoch >= config['startiter']: err_t_ent = config['bnm_ew'] * get_loss_entropy(class_output_t) err += err_t_ent if i == 1: print('epoch {}, loss_t_ent {:.2f}'.format( epoch, err_t_ent.item())) if config['vat'] == 1 and epoch >= config['startiter']: err_t_vat = config['bnm_ew'] * vat_loss( data_target, class_output_t) err += err_t_vat if i == 1: print('epoch {}, loss_t_vat {:.2f}'.format( epoch, err_t_vat.item())) if config['slim'] > 0: feature_target_semi = extractor(data_target_semi) feature_target_semi = feature_target_semi.view( feature_target_semi.size(0), -1) preds_target_semi, _ = classifier(feature_target_semi) err_t_class_semi = loss_class(preds_target_semi, label_target_semi) err += err_t_class_semi if i == 1: print('epoch {}, err_t_class_semi {:.2f}'.format( epoch, err_t_class_semi.item())) if i == 1: print( 'epoch {}, err_s_label {:.2f}, err_s_domain {:.2f}, err_t_domain {:.2f}, total err {:.2f}' .format(epoch, err_s_label.item(), err_s_domain.item(), err_t_domain.item(), err.item())) err.backward() optimizer.step() for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, critic, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: title = 'DANN' if config['bnm'] == 1 and config['vat'] == 1: title = '(b) Proposed' elif config['bnm'] == 1: title = 'BNM' elif config['vat'] == 1: title = 'VADA' elif config['mmd'] == 1: title = 'DCTLN' elif config['ent'] == 1: title = 'EntMin' # draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, config['models']) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False)
def train_dctln(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32, dilation=config['dilation']) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32, dilation=config['dilation']) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) critic = Critic2(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() critic = critic.cuda() criterion = torch.nn.CrossEntropyLoss() loss_class = torch.nn.CrossEntropyLoss() loss_domain = torch.nn.CrossEntropyLoss() res_dir = os.path.join( config['res_dir'], 'slim{}-snr{}-lr{}'.format(config['slim'], config['snr'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) set_log_config(res_dir) logging.debug('train_dann') logging.debug(extractor) logging.debug(classifier) logging.debug(critic) logging.debug(config) optimizer = optim.Adam([{ 'params': extractor.parameters() }, { 'params': classifier.parameters() }, { 'params': critic.parameters() }], lr=config['lr']) def dann(input_data, alpha): feature = extractor(input_data) feature = feature.view(feature.size(0), -1) reverse_feature = ReverseLayerF.apply(feature, alpha) class_output = classifier(feature) domain_output = critic(reverse_feature) return class_output, domain_output, feature def train(extractor, classifier, critic, config, epoch): extractor.train() classifier.train() critic.train() gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) if config['slim'] > 0: iter_target_semi = iter(config['target_train_semi_loader']) len_target_semi_loader = len(config['target_train_semi_loader']) num_iter = len_source_loader for i in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() data_target_semi, label_target_semi = iter_target_semi.next() if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if i % len_target_semi_loader == 0: iter_target_semi = iter(config['target_train_semi_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() data_target_semi, label_target_semi = data_target_semi.cuda( ), label_target_semi.cuda() optimizer.zero_grad() source = extractor(data_source) source = source.view(source.size(0), -1) target = extractor(data_target) target = target.view(target.size(0), -1) # loss_mmd = mmd_linear(source, target) class_output_s, domain_output, _ = dann(input_data=data_source, alpha=gamma) # print('domain_output {}'.format(domain_output.size())) err_s_label = loss_class(class_output_s, label_source) domain_label = torch.zeros(data_source.size(0)).long().cuda() err_s_domain = loss_domain(domain_output, domain_label) # Training model using target data domain_label = torch.ones(data_target.size(0)).long().cuda() class_output_t, domain_output, _ = dann(input_data=data_target, alpha=gamma) err_t_domain = loss_domain(domain_output, domain_label) class_output_semi_t, _, _ = dann(input_data=data_target_semi, alpha=gamma) err_t_label = loss_class(class_output_semi_t, label_target_semi) # err = 1.0*err_s_label + err_s_domain + err_t_domain + 0*loss_mmd + err_t_label err = 1.0 * err_s_label + err_s_domain + err_t_domain + err_t_label # if i % 200 == 0: # # print('err_s_label {}, err_s_domain {}, gamma {}, err_t_domain {}, loss_mmd {}, total err {}'.format(err_s_label.item(), err_s_domain.item(), gamma, err_t_domain.item(), loss_mmd.item(), err.item())) # print('err_s_label {:.2f}, err_t_label {:.2f}, err_s_domain {:.2f}, gamma {:.2f}, err_t_domain {:.2f}, total err {:.2f}'.format(err_s_label.item(), err_t_label.item(), err_s_domain.item(), gamma, err_t_domain.item(), err.item())) err.backward() optimizer.step() for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, critic, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, config['models']) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=True)
def train_cdan(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], bn=config['bn']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) vat_loss = VAT(extractor, classifier, n_power=1, radius=3.5).cuda() if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() cdan_random = config['random_layer'] res_dir = os.path.join(config['res_dir'], 'slim{}-targetLabel{}-snr{}-snrp{}-lr{}'.format(config['slim'], config['target_labeling'], config['snr'], config['snrp'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_cdan') #print(extractor) #print(classifier) print(config) set_log_config(res_dir) logging.debug('train_cdan') logging.debug(extractor) logging.debug(classifier) logging.debug(config) if config['models'] == 'DANN': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([ {'params': extractor.parameters(), 'lr': config['lr']}, {'params': classifier.parameters(), 'lr': config['lr']} ]) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr']) extractor_path = os.path.join(res_dir, "extractor.pth") classifier_path = os.path.join(res_dir, "classifier.pth") adnet_path = os.path.join(res_dir, "adnet.pth") def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) if config['slim'] > 0: iter_target_semi = iter(config['target_train_semi_loader']) len_target_semi_loader = len(config['target_train_semi_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, label_target = iter_target.next() if config['slim'] > 0: data_target_semi, label_target_semi = iter_target_semi.next() if step % len_target_semi_loader == 0: iter_target_semi = iter(config['target_train_semi_loader']) if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda(), label_source.cuda() data_target, label_target = data_target.cuda(), label_target.cuda() if config['slim'] > 0: data_target_semi, label_target_semi = data_target_semi.cuda(), label_target_semi.cuda() optimizer.zero_grad() optimizer_ad.zero_grad() h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) softmax_output_s = nn.Softmax(dim=1)(source_preds) target_preds = classifier(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) if config['target_labeling'] == 1: cls_loss += nn.CrossEntropyLoss()(target_preds, label_target) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter*(epoch-start_epoch)+step), random_layer) elif config['models'] == 'CDAN': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'DANN': d_loss = loss_func.DANN(feature, ad_net, gamma) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 loss = cls_loss + d_loss err_t_bnm = get_loss_bnm(target_preds) err_s_vat = vat_loss(data_source, source_preds) err_t_vat = vat_loss(data_target, target_preds) loss += 1.0 * err_s_vat + 1.0 * err_t_vat + 1.0 * err_t_bnm if config['slim'] > 0: feature_target_semi = extractor(data_target_semi) feature_target_semi = feature_target_semi.view(feature_target_semi.size(0), -1) preds_target_semi = classifier(feature_target_semi) loss += nn.CrossEntropyLoss()(preds_target_semi, label_target_semi) loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 100 == 0: print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'.format(epoch, cls_loss.item(), d_loss.item(), loss.item())) if config['testonly'] == 0: best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch torch.save(extractor.state_dict(), extractor_path) torch.save(classifier.state_dict(), classifier_path) torch.save(ad_net.state_dict(), adnet_path) print('epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'.format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) # draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False) else: if os.path.exists(extractor_path) and os.path.exists(classifier_path) and os.path.exists(adnet_path): extractor.load_state_dict(torch.load(extractor_path)) classifier.load_state_dict(torch.load(classifier_path)) ad_net.load_state_dict(torch.load(adnet_path)) print('Test only mode, model loaded') # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], -1) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], -1) title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, -1, title) # draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, -1, title, separate=True) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, -1, title, separate=True) else: print('no saved model found')
def train_deepcoral(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32, dilation=config['dilation']) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32, dilation=config['dilation']) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() res_dir = os.path.join( config['res_dir'], 'normal{}-{}-dilation{}-lr{}-mmdgamma{}'.format( config['normal'], config['network'], config['dilation'], config['lr'], config['mmd_gamma'])) if not os.path.exists(res_dir): os.makedirs(res_dir) criterion = torch.nn.CrossEntropyLoss() set_log_config(res_dir) logging.debug('train_deepcoral') logging.debug(extractor) logging.debug(classifier) logging.debug(config) optimizer = optim.Adam(list(extractor.parameters()) + list(classifier.parameters()), lr=config['lr']) def train(extractor, classifier, config, epoch): extractor.train() classifier.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) if config['slim'] > 0: iter_target_semi = iter(config['target_train_semi_loader']) len_target_semi_loader = len(config['target_train_semi_loader']) num_iter = len_source_loader for i in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if config['slim'] > 0: data_target_semi, label_target_semi = iter_target_semi.next() if i % len_target_semi_loader == 0: iter_target_semi = iter(config['target_train_semi_loader']) if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() if config['slim'] > 0: data_target_semi, label_target_semi = data_target_semi.cuda( ), label_target_semi.cuda() optimizer.zero_grad() source = extractor(data_source) source = source.view(source.size(0), -1) target = extractor(data_target) target = target.view(target.size(0), -1) preds = classifier(source) loss_cls = criterion(preds, label_source) loss_coral = CORAL(source, target) # gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 # loss = loss_cls + gamma * loss_coral loss = loss_cls + config['mmd_gamma'] * loss_coral if config['slim'] > 0: feature_target_semi = extractor(data_target_semi) feature_target_semi = feature_target_semi.view( feature_target_semi.size(0), -1) preds_target_semi = classifier(feature_target_semi) err_t_class_semi = criterion(preds_target_semi, label_target_semi) loss += err_t_class_semi if i % 50 == 0: print('loss_cls {}, loss_coral {}, gamma {}, total loss {}'. format(loss_cls.item(), loss_coral.item(), config['mmd_gamma'], loss.item())) loss.backward() optimizer.step() for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: print('test on source_test_loader') test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, config['models']) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=True) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=False)
def train_tcl_vat(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() #summary(extractor, (1, 5120)) res_dir = os.path.join( config['res_dir'], 'slim{}-snr{}-snrp{}-Lythred{}-Ldthred{}-lambdad{}-lr{}'.format( config['slim'], config['snr'], config['snrp'], config['Lythred'], config['Ldthred'], config['lambdad'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_tcl') #print(extractor) #print(classifier) print(config) set_log_config(res_dir) logging.debug('train_tcl') logging.debug(extractor) logging.debug(classifier) logging.debug(config) ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([{ 'params': extractor.parameters(), 'lr': config['lr'] }, { 'params': classifier.parameters(), 'lr': config['lr'] }], weight_decay=0.0001) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr'], weight_decay=0.0001) print(ad_net) extractor_path = os.path.join(res_dir, "extractor.pth") classifier_path = os.path.join(res_dir, "classifier.pth") adnet_path = os.path.join(res_dir, "adnet.pth") def cal_Ly(source_y_softmax, source_d, label): # # source_y_softmax, category预测结果带softmax # source_d,domain预测结果 # label: 实际category标签 # agey = -math.log(config['Lythred']) aged = -math.log(1.0 - config['Ldthred']) age = agey + config['lambdad'] * aged # print('agey {}, labmdad {}, aged {}, age {}'.format(agey, config['lambdad'], aged, age)) y_softmax = source_y_softmax the_index = torch.LongTensor(np.array(range( config['batch_size']))).cuda() # 这是什么意思?对于每个样本,只取出实际label对应的softmax值 # 与softmax loss有什么区别? y_label = y_softmax[the_index, label] # print('y_softmax {}, the_index {}, y_label shape {}'.format(y_softmax.shape, the_index.shape, y_label.shape)) y_loss = -torch.log(y_label + 1e-8) d_loss = -torch.log(1.0 - source_d) d_loss = d_loss.view(config['batch_size']) weight_loss = y_loss + config['lambdad'] * d_loss # print('y_loss {}'.format(torch.mean(y_loss))) # print('lambdad {}'.format(config['lambdad'])) # print('d_loss {}'.format(torch.mean(d_loss))) # print('y_loss {}'.format(y_loss.item())) # print('lambdad {}'.format(config['lambdad'])) # print('d_loss {}'.format(d_loss.item())) weight_var = (weight_loss < age).float().detach() Ly = torch.mean(y_loss * weight_var) source_weight = weight_var.data.clone() source_num = float((torch.sum(source_weight))) return Ly, source_weight, source_num def cal_Lt(target_y_softmax): # 这是entropy loss吧? Gt_var = target_y_softmax Gt_en = -torch.sum((Gt_var * torch.log(Gt_var + 1e-8)), 1) Lt = torch.mean(Gt_en) return Lt def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader if config['slim'] > 0: iter_target_semi = iter(config['target_train_semi_loader']) len_target_semi_loader = len(config['target_train_semi_loader']) for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if config['slim'] > 0: data_target_semi, label_target_semi = iter_target_semi.next() if step % len_target_semi_loader == 0: iter_target_semi = iter(config['target_train_semi_loader']) if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() if config['slim'] > 0: data_target_semi, label_target_semi = data_target_semi.cuda( ), label_target_semi.cuda() source_domain_label = torch.FloatTensor(config['batch_size'], 1) target_domain_label = torch.FloatTensor(config['batch_size'], 1) source_domain_label.fill_(1) target_domain_label.fill_(0) domain_label = torch.cat( [source_domain_label, target_domain_label], 0) domain_label = domain_label.cuda() inputs = torch.cat([data_source, data_target], 0) features = extractor(inputs) gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 y_var = classifier(features) features = features.view(features.size(0), -1) d_var = ad_net(features, gamma) y_softmax_var = nn.Softmax(dim=1)(y_var) source_y, target_y = y_var.chunk(2, 0) source_y_softmax, target_y_softmax = y_softmax_var.chunk(2, 0) source_d, target_d = d_var.chunk(2, 0) # h_s = extractor(data_source) # h_s = h_s.view(h_s.size(0), -1) # h_t = extractor(data_target) # h_t = h_t.view(h_t.size(0), -1) # source_preds = classifier(h_s) # softmax_output_s = nn.Softmax(dim=1)(source_preds) # target_preds = classifier(h_t) # softmax_output_t = nn.Softmax(dim=1)(target_preds) # source_d, d_loss_source = loss_func.DANN_logits(h_s, ad_net, gamma) # target_d, d_loss_target = loss_func.DANN_logits(h_t, ad_net, gamma) # source_d = ad_net(h_s, gamma) # target_d = ad_net(h_t, gamma) #calculate Ly if epoch < config['startiter']: #也就是cls_loss,不考虑权重 Ly = nn.CrossEntropyLoss()(source_y, label_source) else: Ly, source_weight, source_num = cal_Ly(source_y_softmax, source_d, label_source) # print('source_num {}'.format(source_num)) target_weight = torch.ones(source_weight.size()).cuda() #calculate Lt # 计算target category的熵 Lt = cal_Lt(target_y_softmax) #calculate Ld if epoch < config['startiter']: Ld = nn.BCELoss()(d_var, domain_label) else: domain_weight = torch.cat([source_weight, target_weight], 0) domain_weight = domain_weight.view(-1, 1) # print('domain_weight {}'.format(domain_weight.shape)) # print('domain_weight {}'.format(domain_weight)) # print('d_var {}'.format(d_var)) domain_criterion = nn.BCELoss(weight=domain_weight).cuda() # domain_criterion = nn.BCELoss().cuda() # print('max {}'.format(torch.max(d_var))) # print('min {}'.format(torch.min(d_var))) # print(d_var) Ld = domain_criterion(d_var, domain_label) loss = Ly + config['traded'] * Ld + config['tradet'] * Lt if config['slim'] > 0: feature_target_semi = extractor(data_target_semi) feature_target_semi = feature_target_semi.view( feature_target_semi.size(0), -1) preds_target_semi = classifier(feature_target_semi) loss += nn.CrossEntropyLoss()(preds_target_semi, label_target_semi) optimizer.zero_grad() optimizer_ad.zero_grad() # net.zero_grad() loss.backward() optimizer.step() optimizer_ad.step() # if (step) % 20 == 0: # print('Train Epoch {} closs {:.6f}, dloss {:.6f}, coral_loss {:.6f}, Loss {:.6f}'.format(epoch, cls_loss.item(), d_loss.item(), coral_loss.item(), loss.item())) # print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'.format(epoch, cls_loss.item(), d_loss.item(), loss.item())) best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch torch.save(extractor.state_dict(), extractor_path) torch.save(classifier.state_dict(), classifier_path) torch.save(ad_net.state_dict(), adnet_path) print( 'epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'. format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False)
def train_cdan_vat(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], bn=config['bn']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() cdan_random = config['random_layer'] res_dir = os.path.join( config['res_dir'], 'normal{}-{}-dilation{}-lr{}'.format(config['normal'], config['network'], config['dilation'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_cdan_vat') #print(extractor) #print(classifier) print(config) set_log_config(res_dir) logging.debug('train_cdan_vat') logging.debug(extractor) logging.debug(classifier) logging.debug(config) if config['models'] == 'DANN_VAT': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([{ 'params': extractor.parameters(), 'lr': config['lr'] }, { 'params': classifier.parameters(), 'lr': config['lr'] }]) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr']) vat_loss = VAT(extractor, classifier, n_power=1, radius=3.5).cuda() def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, label_target = iter_target.next() if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target, label_target = data_target.cuda( ), label_target.cuda() with torch.no_grad(): if 'CDAN' in config['models']: h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) source_preds = classifier(h_s) softmax_output_s = nn.Softmax(dim=1)(source_preds) op_out = torch.bmm(softmax_output_s.unsqueeze(2), h_s.unsqueeze(1)) gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 ad_out = ad_net(op_out.view( -1, softmax_output_s.size(1) * h_s.size(1)), gamma, training=False) dom_entropy = 1 - (torch.abs(0.5 - ad_out))**config['iw'] dom_weight = dom_entropy elif 'DANN' in config['models']: h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 ad_out = ad_net(h_s, gamma, training=False) dom_entropy = 1 - (torch.abs(0.5 - ad_out))**config['iw'] dom_weight = dom_entropy optimizer.zero_grad() optimizer_ad.zero_grad() h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) softmax_output_s = nn.Softmax(dim=1)(source_preds) if config['iw'] > 0: cls_loss = nn.CrossEntropyLoss(reduction='none')(source_preds, label_source) cls_loss = torch.mean(dom_weight * cls_loss) # print('dom_weight mean {}'.format(torch.mean(dom_weight))) else: cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) target_preds = classifier(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) if config['target_labeling'] == 1: cls_loss += nn.CrossEntropyLoss()(target_preds, label_target) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) d_loss = loss_func.CDAN( [feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter * (epoch - start_epoch) + step), random_layer) elif config['models'] == 'CDAN_VAT': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'DANN_VAT': d_loss = loss_func.DANN(feature, ad_net, gamma) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 # target entropy loss err_t_entropy = get_loss_entropy(softmax_output_t) # virtual adversarial loss. err_s_vat = vat_loss(data_source, source_preds) err_t_vat = vat_loss(data_target, target_preds) # loss = cls_loss + d_loss loss = cls_loss + d_loss + err_t_entropy + err_s_vat + err_t_vat loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 20 == 0: print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'. format(epoch, cls_loss.item(), d_loss.item(), loss.item())) best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch print( 'epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'. format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False)
def train_mcd_a(config): if config['network'] == 'inceptionv1': G = InceptionV1(num_classes=32, dilation=config['dilation']) elif config['network'] == 'inceptionv1s': G = InceptionV1s(num_classes=32, dilation=config['dilation']) else: G = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) C1 = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) C2 = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): G = G.cuda() C1 = C1.cuda() C2 = C2.cuda() # opt_g = optim.Adam(G.parameters(), lr=config['lr'], weight_decay=0.0005) # opt_c1 = optim.Adam(C1.parameters(), lr=config['lr'], weight_decay=0.0005) # opt_c2 = optim.Adam(C2.parameters(), lr=config['lr'], weight_decay=0.0005) opt_g = optim.Adam(G.parameters(), lr=config['lr']) opt_c1 = optim.Adam(C1.parameters(), lr=config['lr']) opt_c2 = optim.Adam(C2.parameters(), lr=config['lr']) criterion = torch.nn.CrossEntropyLoss() res_dir = os.path.join( config['res_dir'], 'normal{}-{}-lr{}'.format(config['normal'], config['network'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) set_log_config(res_dir) logging.debug('train_mcd') logging.debug(G) logging.debug(C1) logging.debug(C2) logging.debug(config) def train(G, C1, C2, config, epoch): G.train() C1.train() C2.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for i in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() # step 1. # opt_g.zero_grad() opt_c1.zero_grad() opt_c2.zero_grad() # 源分类误差 feat_s = G(data_source) feat_t = G(data_target) output_s1 = C1(feat_s) output_s2 = C2(feat_s) loss_s1 = criterion(output_s1, label_source) loss_s2 = criterion(output_s2, label_source) loss = loss_s1 + loss_s2 if i == 0: print('Epoch: {} Stage1, Loss1: {:.6f}, Loss2: {:.6f}'.format( epoch, loss_s1.item(), loss_s2.item())) if config['mcd_vat']: # vat误差 source_vat_c1 = vat(C1, feat_s, 0.5) opt_g.zero_grad() opt_c1.zero_grad() opt_c2.zero_grad() target_vat_c1 = vat(C1, feat_t, 0.5) opt_g.zero_grad() opt_c1.zero_grad() opt_c2.zero_grad() source_vat_c2 = vat(C2, feat_s, 0.5) opt_g.zero_grad() opt_c1.zero_grad() opt_c2.zero_grad() target_vat_c2 = vat(C2, feat_t, 0.5) opt_g.zero_grad() opt_c1.zero_grad() opt_c2.zero_grad() source_vat_loss1 = get_loss_vat(C1, feat_s, source_vat_c1) source_vat_loss2 = get_loss_vat(C2, feat_s, source_vat_c2) target_vat_loss1 = get_loss_vat(C1, feat_t, target_vat_c1) target_vat_loss2 = get_loss_vat(C2, feat_t, target_vat_c2) loss += source_vat_loss1 + source_vat_loss2 + target_vat_loss1 + target_vat_loss2 if i == 0: print( 'Epoch: {} Stage1, source_vat_loss1: {:.6f}, source_vat_loss2: {:.6f}, target_vat_loss1: {:.6f}, target_vat_loss2: {:.6f}' .format(epoch, source_vat_loss1.item(), source_vat_loss2.item(), target_vat_loss1.item(), target_vat_loss2.item())) if config['mcd_entropy'] == 1: entropy_loss_c1 = get_loss_entropy(C1, feat_t) entropy_loss_c2 = get_loss_entropy(C2, feat_t) loss += entropy_loss_c1 + entropy_loss_c2 if i == 0: print( 'Epoch: {} Stage1, entropy c1: {:.6f}, entropy c2: {:.6f}' .format(epoch, entropy_loss_c1.item(), entropy_loss_c2.item())) loss.backward() opt_g.step() opt_c1.step() opt_c2.step() # step 2. # opt_g.zero_grad() opt_c1.zero_grad() opt_c2.zero_grad() feat_s = G(data_source) output_s1 = C1(feat_s) output_s2 = C2(feat_s) loss_dis_s = discrepancy(output_s1, output_s2) feat_t = G(data_target) output_t1 = C1(feat_t) output_t2 = C2(feat_t) loss_dis_t = discrepancy(output_t1, output_t2) loss_s1 = criterion(output_s1, label_source) loss_s2 = criterion(output_s2, label_source) # 源分类误差 - 源和目的特征差异 loss = loss_s1 + loss_s2 - loss_dis_t - loss_dis_s loss.backward() opt_c1.step() opt_c2.step() if i == 0: print( 'Epoch: {} Stage2, Loss1: {:.6f}, Loss2: {:.6f} Disc S: {:.6f}, Disc T: {:.6f}' .format(epoch, loss_s1.item(), loss_s2.item(), loss_dis_s.item(), loss_dis_t.item())) # 更新特征提取器 for _ in range(1): opt_g.zero_grad() opt_c1.zero_grad() opt_c2.zero_grad() feat_s = G(data_source) output_s1 = C1(feat_s) output_s2 = C2(feat_s) loss_dis_s = discrepancy(output_s1, output_s2) feat_t = G(data_target) output_t1 = C1(feat_t) output_t2 = C2(feat_t) loss_dis_t = discrepancy(output_t1, output_t2) loss_s1 = criterion(output_s1, label_source) loss_s2 = criterion(output_s2, label_source) loss = loss_s1 + loss_s2 + loss_dis_s + loss_dis_t loss.backward() opt_g.step() if i == 0: print( 'Epoch: {} Stage3, Loss1: {:.6f}\t Loss2: {:.6f}\t Discrepancy S: {:.6f}\t Discrepancy T: {:.6f}' .format(epoch, loss_s1.item(), loss_s2.item(), loss_dis_s.item(), loss_dis_t.item())) # logging.debug('Train Epoch: {} Loss1: {:.6f}\t Loss2: {:.6f}\t Discrepancy: {:.6f}'.format( # epoch, loss_s1.item(), loss_s2.item(), loss_dis.item())) def train_onestep(G, C1, C2, config, epoch): criterion = nn.CrossEntropyLoss().cuda() G.train() C1.train() C2.train() gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for i in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() opt_g.zero_grad() opt_c1.zero_grad() opt_c2.zero_grad() set_requires_grad(G, requires_grad=True) set_requires_grad(C1, requires_grad=True) set_requires_grad(C2, requires_grad=True) feat_s = G(data_source) output_s1 = C1(feat_s) output_s2 = C2(feat_s) loss_s1 = criterion(output_s1, label_source) loss_s2 = criterion(output_s2, label_source) loss_s = loss_s1 + loss_s2 # loss_s.backward(retain_variables=True) ##loss_s.backward() set_requires_grad(G, requires_grad=False) set_requires_grad(C1, requires_grad=True) set_requires_grad(C2, requires_grad=True) with torch.no_grad(): feat_t = G(data_target) reverse_feature_t = ReverseLayerF.apply(feat_t, gamma) output_t1 = C1(reverse_feature_t) output_t2 = C2(reverse_feature_t) loss_dis = -discrepancy(output_t1, output_t2) ##loss_dis.backward() loss = loss_s + loss_dis loss.backward() opt_c1.step() opt_c2.step() opt_g.step() if i % 20 == 0: print( 'Train Epoch: {}, Loss1: {:.6f}\t Loss2: {:.6f}\t Discrepancy: {:.6f}' .format(epoch, loss_s1.item(), loss_s2.item(), loss_dis.item())) for epoch in range(1, config['n_epochs'] + 1): if config['mcd_onestep'] == 1: train_onestep(G, C1, C2, config, epoch) else: train(G, C1, C2, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: #print('C1 on source_test_loader') #logging.debug('C1 on source_test_loader') #test(G, C1, config['source_test_loader'], epoch) #print('C2 on source_test_loader') #logging.debug('C2 on source_test_loader') #test(G, C2, config['source_test_loader'], epoch) print('C1 on target_test_loader') logging.debug('C1 on target_test_loader') test(G, C1, config['target_test_loader'], epoch) print('C2 on target_test_loader') logging.debug('C2 on target_test_loader') test(G, C2, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: draw_confusion_matrix(G, C1, config['target_test_loader'], res_dir, epoch, config['models']) draw_tsne(G, C1, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=True) draw_tsne(G, C1, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=False)
def train_dann_mm2(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], bn=config['bn']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) # classifier = Predictor_deep(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], num_class=config['n_class']) critic = Critic2(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() critic = critic.cuda() summary(extractor, (1, 5120)) res_dir = os.path.join(config['res_dir'], 'snr{}-lr{}'.format(config['snr'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) set_log_config(res_dir) logging.debug('train_dann_mm2') logging.debug(extractor) logging.debug(classifier) logging.debug(critic) logging.debug(config) criterion = torch.nn.CrossEntropyLoss() optimizer_e = optim.Adam(extractor.parameters(), lr=config['lr']) optimizer_cls = optim.Adam(classifier.parameters(), lr=config['lr']) optimizer_critic = optim.Adam(critic.parameters(), lr=config['lr']) def dann(input_data, alpha): feature = extractor(input_data) feature = feature.view(feature.size(0), -1) reverse_feature = ReverseLayerF.apply(feature, alpha) class_output = classifier(feature) domain_output = critic(reverse_feature) return class_output, domain_output, feature def entropy(F1, feat, lamda, eta=1.0): out_t1 = F1(feat, reverse=True, eta=-eta) out_t1 = F.softmax(out_t1, dim=1) loss_ent = -lamda * torch.mean( torch.sum(out_t1 * (torch.log(out_t1 + 1e-5)), 1)) return loss_ent def adentropy(F1, feat, lamda, eta=1.0): out_t1 = F1(feat, reverse=True, eta=eta) out_t1 = F.softmax(out_t1, dim=1) loss_adent = lamda * torch.mean( torch.sum(out_t1 * (torch.log(out_t1 + 1e-5)), 1)) return loss_adent def entropy_softmax(output, lamda): loss_ent = -lamda * torch.mean( torch.sum(output * (torch.log(output + 1e-5)), 1)) return loss_ent def adentropy_softmax(output, lamda): loss_adent = lamda * torch.mean( torch.sum(output * (torch.log(output + 1e-5)), 1)) return loss_adent def train(extractor, classifier, critic, config, epoch): extractor.train() classifier.train() critic.train() gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for i in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() optimizer_e.zero_grad() optimizer_cls.zero_grad() optimizer_critic.zero_grad() class_output_s, domain_output, _ = dann(input_data=data_source, alpha=gamma) err_s_label = criterion(class_output_s, label_source) domain_label = torch.zeros(data_source.size(0)).long().cuda() err_s_domain = criterion(domain_output, domain_label) # Training model using target data domain_label = torch.ones(data_target.size(0)).long().cuda() class_output_t, domain_output, _ = dann(input_data=data_target, alpha=gamma) err_t_domain = criterion(domain_output, domain_label) err = err_s_label + err_s_domain + err_t_domain if i % 100 == 0: print( 'err_s_label {:.2f}, err_s_domain {:.2f}, gamma {:.2f}, err_t_domain {:.2f}, total err {:.2f}' .format(err_s_label.item(), err_s_domain.item(), gamma, err_t_domain.item(), err.item())) err.backward() optimizer_e.step() optimizer_cls.step() optimizer_critic.step() # minmax optimizer_e.zero_grad() optimizer_cls.zero_grad() feature_t = extractor(data_target) feature_t = feature_t.view(feature_t.size(0), -1) # entropy_loss = adentropy(classifier, feature_t, 1) entropy_loss = entropy(classifier, feature_t, 1) entropy_loss.backward() optimizer_e.step() optimizer_cls.step() for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, critic, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, config['models']) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=True) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, config['models'], separate=False)
def train_cdan_iw(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() #summary(extractor, (1, 5120)) cdan_random = config['random_layer'] res_dir = os.path.join( config['res_dir'], 'normal{}-{}-dilation{}-iw{}-lr{}'.format(config['normal'], config['network'], config['dilation'], config['iw'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_cdan_iw') #print(extractor) #print(classifier) print(config) set_log_config(res_dir) logging.debug('train_cdan') logging.debug(extractor) logging.debug(classifier) logging.debug(config) if config['models'] == 'DANN_IW': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([{ 'params': extractor.parameters(), 'lr': config['lr'] }, { 'params': classifier.parameters(), 'lr': config['lr'] }], weight_decay=0.0001) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr'], weight_decay=0.0001) print(ad_net) extractor_path = os.path.join(res_dir, "extractor.pth") classifier_path = os.path.join(res_dir, "classifier.pth") adnet_path = os.path.join(res_dir, "adnet.pth") def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() """ add code start """ with torch.no_grad(): if config['models'] == 'CDAN_IW': h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) softmax_output_s = nn.Softmax(dim=1)(source_preds) # print(softmax_output_s.shape) # print(softmax_output_s.unsqueeze(2).shape) # print(softmax_output_s) # target_preds = classifier(h_t) # softmax_output_t = nn.Softmax(dim=1)(target_preds) # feature = torch.cat((h_s, h_t), 0) # softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) weights = torch.ones(softmax_output_s.shape).cuda() weights = 1.0 * weights weights = weights.unsqueeze(2) # op_out = torch.bmm(softmax_output_s.unsqueeze(2), h_s.unsqueeze(1)) op_out = torch.bmm(weights, h_s.unsqueeze(1)) # gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 gamma = 1 ad_out = ad_net(op_out.view( -1, softmax_output_s.size(1) * h_s.size(1)), gamma, training=False) # dom_entropy = loss_func.Entropy(ad_out) dom_entropy = 1 + (torch.abs(0.5 - ad_out))**config['iw'] # dom_weight = dom_entropy / torch.sum(dom_entropy) dom_weight = dom_entropy elif config['models'] == 'DANN_IW': h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) # gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 gamma = 1 ad_out = ad_net(h_s, gamma, training=False) # dom_entropy = 1-((torch.abs(0.5-ad_out))**config['iw']) # dom_weight = dom_entropy dom_weight = torch.ones(ad_out.shape).cuda() #dom_entropy = loss_func.Entropy(dom_entropy) # dom_weight = dom_entropy / torch.sum(dom_entropy) """ add code end """ optimizer.zero_grad() optimizer_ad.zero_grad() h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) softmax_output_s = nn.Softmax(dim=1)(source_preds) target_preds = classifier(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) cls_loss = nn.CrossEntropyLoss(reduction='none')(source_preds, label_source) cls_loss = torch.mean(dom_weight * cls_loss) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN_EIW': entropy = loss_func.Entropy(softmax_output) # print('softmax_output {}, entropy {}'.format(softmax_output.size(), entropy.size())) d_loss = loss_func.CDAN( [feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter * (epoch - start_epoch) + step), random_layer) elif config['models'] == 'CDAN_IW': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'DANN_IW': d_loss = loss_func.DANN(feature, ad_net, gamma) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 loss = cls_loss + d_loss loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 20 == 0: print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'. format(epoch, cls_loss.item(), d_loss.item(), loss.item())) best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch torch.save(extractor.state_dict(), extractor_path) torch.save(classifier.state_dict(), classifier_path) torch.save(ad_net.state_dict(), adnet_path) print( 'epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'. format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False)