def train_cdan_vat(config): extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() xi = 1e-06 ip = 1 eps = 15 vat = VirtualAdversarialPerturbationGenerator(extractor, classifier, xi=xi, eps=eps, ip=ip) cdan_random = config['random_layer'] res_dir = os.path.join(config['res_dir'], 'random{}-bs{}-lr{}'.format(cdan_random, config['batch_size'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_cdan') print(extractor) print(classifier) print(config) set_log_config(res_dir) logging.debug('train_cdan') logging.debug(extractor) logging.debug(classifier) logging.debug(config) if config['models'] == 'DANN': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([ {'params': extractor.parameters(), 'lr': config['lr']}, {'params': classifier.parameters(), 'lr': config['lr']} ]) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr']) extractor_path = os.path.join(res_dir, "extractor.pth") classifier_path = os.path.join(res_dir, "classifier.pth") adnet_path = os.path.join(res_dir, "adnet.pth") def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda(), label_source.cuda() data_target = data_target.cuda() optimizer.zero_grad() optimizer_ad.zero_grad() h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) softmax_output_s = nn.Softmax(dim=1)(source_preds) target_preds = classifier(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter*(epoch-start_epoch)+step), random_layer) elif config['models'] == 'CDAN': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'DANN': d_loss = loss_func.DANN(feature, ad_net, gamma) elif config['models'] == 'CDAN_VAT': # entropy = loss_func.Entropy(softmax_output) # d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter*(epoch-start_epoch)+step), random_layer) d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) # vat_loss = loss_func.VAT(vat, data_target, extractor, classifier, target_consistency_criterion) # vat_adv, clean_vat_logits = vat(data_target) # vat_adv_inputs = data_target + vat_adv # adv_vat_features = extractor(vat_adv_inputs) # adv_vat_logits = classifier(adv_vat_features) # target_vat_loss = target_consistency_criterion(adv_vat_logits, clean_vat_logits) # vat_loss = target_vat_loss_weight * target_vat_loss else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 loss = cls_loss + d_loss loss.backward() optimizer.step() vat_adv, clean_vat_logits = vat(data_target) vat_adv_inputs = data_target + vat_adv adv_vat_features = extractor(vat_adv_inputs) adv_vat_logits = classifier(adv_vat_features) target_vat_loss = target_consistency_criterion(adv_vat_logits, clean_vat_logits) vat_loss = target_vat_loss_weight * target_vat_loss vat_loss.backward() # optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 20 == 0: print('Train Epoch {} closs {:.6f}, dloss {:.6f}, vat_loss {:.6f}, Loss {:.6f}'.format(epoch, cls_loss.item(), d_loss.item(), vat_loss.item(), loss.item())) if config['testonly'] == 0: best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: print('test on source_test_loader') test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch torch.save(extractor.state_dict(), extractor_path) torch.save(classifier.state_dict(), classifier_path) torch.save(ad_net.state_dict(), adnet_path) print('epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'.format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) # draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False) else: if os.path.exists(extractor_path) and os.path.exists(classifier_path) and os.path.exists(adnet_path): extractor.load_state_dict(torch.load(extractor_path)) classifier.load_state_dict(torch.load(classifier_path)) ad_net.load_state_dict(torch.load(adnet_path)) print('Test only mode, model loaded') print('test on source_test_loader') test(extractor, classifier, config['source_test_loader'], -1) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], -1) title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, -1, title) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, -1, title, separate=True) else: print('no saved model found')
def train_pada(config): if config['network'] == 'inceptionv1': extractor_s = InceptionV1(num_classes=32) extractor_t = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor_s = InceptionV1s(num_classes=32) extractor_t = InceptionV1s(num_classes=32) else: extractor_s = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) extractor_t = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier_s = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) classifier_t = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor_s = extractor_s.cuda() classifier_s = classifier_s.cuda() extractor_t = extractor_t.cuda() classifier_t = classifier_t.cuda() cdan_random = config['random_layer'] res_dir = os.path.join( config['res_dir'], 'normal{}-{}-cons{}-lr{}'.format(config['normal'], config['network'], config['pada_cons_w'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_pada') print(config) set_log_config(res_dir) logging.debug('train_pada') # logging.debug(extractor) # logging.debug(classifier) logging.debug(config) if config['models'] == 'PADA': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer_s = torch.optim.Adam([{ 'params': extractor_s.parameters(), 'lr': config['lr'] }, { 'params': classifier_s.parameters(), 'lr': config['lr'] }]) optimizer_t = torch.optim.Adam([{ 'params': extractor_t.parameters(), 'lr': config['lr'] }, { 'params': classifier_t.parameters(), 'lr': config['lr'] }]) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr']) def train_stage1(extractor_s, classifier_s, config, epoch): extractor_s.train() classifier_s.train() # STAGE 1: # 在labeled source上训练extractor_s和classifier_s # 训练完成后freeze这两个model iter_source = iter(config['source_train_loader']) len_source_loader = len(config['source_train_loader']) for step in range(1, len_source_loader + 1): data_source, label_source = iter_source.next() if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() optimizer_s.zero_grad() h_s = extractor_s(data_source) h_s = h_s.view(h_s.size(0), -1) source_preds = classifier_s(h_s) cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) cls_loss.backward() optimizer_s.step() def train(extractor_s, classifier_s, extractor_t, classifier_t, ad_net, config, epoch): start_epoch = 0 # extractor_s.train() # classifier_s.train() # ad_net.train() # # STAGE 1: # # 在labeled source上训练extractor_s和classifier_s # # 训练完成后freeze这两个model # iter_source = iter(config['source_train_loader']) # len_source_loader = len(config['source_train_loader']) # for step in range(1, len_source_loader + 1): # data_source, label_source = iter_source.next() # if torch.cuda.is_available(): # data_source, label_source = data_source.cuda(), label_source.cuda() # optimizer_s.zero_grad() # h_s = extractor_s(data_source) # h_s = h_s.view(h_s.size(0), -1) # source_preds = classifier_s(h_s) # cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) # cls_loss.backward() # optimizer_s.step() # for param in extractor_s.parameters(): # param.requires_grad = False # for param in classifier_s.parameters(): # param.requires_grad = False # STAGE 2: # 使用新的extractor和classifier进行DANN训练 # 不同的地方是,每个target 同时使用extractor_s和extractor_t extractor_t.train() classifier_t.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, label_target = iter_target.next() if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target, label_target = data_target.cuda( ), label_target.cuda() optimizer_t.zero_grad() optimizer_ad.zero_grad() h_s = extractor_t(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor_t(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier_t(h_s) cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) softmax_output_s = nn.Softmax(dim=1)(source_preds) target_preds = classifier_t(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) if config['target_labeling'] == 1: cls_loss += nn.CrossEntropyLoss()(target_preds, label_target) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) d_loss = loss_func.CDAN( [feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter * (epoch - start_epoch) + step), random_layer) elif config['models'] == 'CDAN': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'PADA': d_loss = loss_func.DANN(feature, ad_net, gamma) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 # constraints loss h_s_prev = extractor_s(data_source) cons_loss = nn.L1Loss()(h_s, h_s_prev) loss = cls_loss + d_loss + config['pada_cons_w'] * cons_loss loss.backward() optimizer_t.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 20 == 0: print( 'Train Epoch {} closs {:.6f}, dloss {:.6f}, cons_loss {:.6f}, Loss {:.6f}' .format(epoch, cls_loss.item(), d_loss.item(), cons_loss.item(), loss.item())) for epoch in range(1, config['n_epochs'] + 1): train_stage1(extractor_s, classifier_s, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: print('test on source_test_loader') test(extractor_s, classifier_s, config['source_test_loader'], epoch) # print('test on target_test_loader') # accuracy = test(extractor_s, classifier_s, config['target_test_loader'], epoch) extractor_t.load_state_dict(extractor_s.state_dict()) classifier_t.load_state_dict(classifier_s.state_dict()) for param in extractor_s.parameters(): param.requires_grad = False for param in classifier_s.parameters(): param.requires_grad = False for epoch in range(1, config['n_epochs'] + 1): train(extractor_s, classifier_s, extractor_t, classifier_t, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor_t, classifier_t, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor_t, classifier_t, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor_t, classifier_t, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True)
def train_cdan(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], bn=config['bn']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) vat_loss = VAT(extractor, classifier, n_power=1, radius=3.5).cuda() if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() cdan_random = config['random_layer'] res_dir = os.path.join(config['res_dir'], 'slim{}-targetLabel{}-snr{}-snrp{}-lr{}'.format(config['slim'], config['target_labeling'], config['snr'], config['snrp'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_cdan') #print(extractor) #print(classifier) print(config) set_log_config(res_dir) logging.debug('train_cdan') logging.debug(extractor) logging.debug(classifier) logging.debug(config) if config['models'] == 'DANN': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([ {'params': extractor.parameters(), 'lr': config['lr']}, {'params': classifier.parameters(), 'lr': config['lr']} ]) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr']) extractor_path = os.path.join(res_dir, "extractor.pth") classifier_path = os.path.join(res_dir, "classifier.pth") adnet_path = os.path.join(res_dir, "adnet.pth") def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) if config['slim'] > 0: iter_target_semi = iter(config['target_train_semi_loader']) len_target_semi_loader = len(config['target_train_semi_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, label_target = iter_target.next() if config['slim'] > 0: data_target_semi, label_target_semi = iter_target_semi.next() if step % len_target_semi_loader == 0: iter_target_semi = iter(config['target_train_semi_loader']) if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda(), label_source.cuda() data_target, label_target = data_target.cuda(), label_target.cuda() if config['slim'] > 0: data_target_semi, label_target_semi = data_target_semi.cuda(), label_target_semi.cuda() optimizer.zero_grad() optimizer_ad.zero_grad() h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) softmax_output_s = nn.Softmax(dim=1)(source_preds) target_preds = classifier(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) if config['target_labeling'] == 1: cls_loss += nn.CrossEntropyLoss()(target_preds, label_target) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter*(epoch-start_epoch)+step), random_layer) elif config['models'] == 'CDAN': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'DANN': d_loss = loss_func.DANN(feature, ad_net, gamma) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 loss = cls_loss + d_loss err_t_bnm = get_loss_bnm(target_preds) err_s_vat = vat_loss(data_source, source_preds) err_t_vat = vat_loss(data_target, target_preds) loss += 1.0 * err_s_vat + 1.0 * err_t_vat + 1.0 * err_t_bnm if config['slim'] > 0: feature_target_semi = extractor(data_target_semi) feature_target_semi = feature_target_semi.view(feature_target_semi.size(0), -1) preds_target_semi = classifier(feature_target_semi) loss += nn.CrossEntropyLoss()(preds_target_semi, label_target_semi) loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 100 == 0: print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'.format(epoch, cls_loss.item(), d_loss.item(), loss.item())) if config['testonly'] == 0: best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch torch.save(extractor.state_dict(), extractor_path) torch.save(classifier.state_dict(), classifier_path) torch.save(ad_net.state_dict(), adnet_path) print('epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'.format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) # draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False) else: if os.path.exists(extractor_path) and os.path.exists(classifier_path) and os.path.exists(adnet_path): extractor.load_state_dict(torch.load(extractor_path)) classifier.load_state_dict(torch.load(classifier_path)) ad_net.load_state_dict(torch.load(adnet_path)) print('Test only mode, model loaded') # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], -1) print('test on target_test_loader') test(extractor, classifier, config['target_test_loader'], -1) title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, -1, title) # draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, -1, title, separate=True) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, -1, title, separate=True) else: print('no saved model found')
def train_tcl_vat(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() #summary(extractor, (1, 5120)) res_dir = os.path.join( config['res_dir'], 'slim{}-snr{}-snrp{}-Lythred{}-Ldthred{}-lambdad{}-lr{}'.format( config['slim'], config['snr'], config['snrp'], config['Lythred'], config['Ldthred'], config['lambdad'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_tcl') #print(extractor) #print(classifier) print(config) set_log_config(res_dir) logging.debug('train_tcl') logging.debug(extractor) logging.debug(classifier) logging.debug(config) ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([{ 'params': extractor.parameters(), 'lr': config['lr'] }, { 'params': classifier.parameters(), 'lr': config['lr'] }], weight_decay=0.0001) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr'], weight_decay=0.0001) print(ad_net) extractor_path = os.path.join(res_dir, "extractor.pth") classifier_path = os.path.join(res_dir, "classifier.pth") adnet_path = os.path.join(res_dir, "adnet.pth") def cal_Ly(source_y_softmax, source_d, label): # # source_y_softmax, category预测结果带softmax # source_d,domain预测结果 # label: 实际category标签 # agey = -math.log(config['Lythred']) aged = -math.log(1.0 - config['Ldthred']) age = agey + config['lambdad'] * aged # print('agey {}, labmdad {}, aged {}, age {}'.format(agey, config['lambdad'], aged, age)) y_softmax = source_y_softmax the_index = torch.LongTensor(np.array(range( config['batch_size']))).cuda() # 这是什么意思?对于每个样本,只取出实际label对应的softmax值 # 与softmax loss有什么区别? y_label = y_softmax[the_index, label] # print('y_softmax {}, the_index {}, y_label shape {}'.format(y_softmax.shape, the_index.shape, y_label.shape)) y_loss = -torch.log(y_label + 1e-8) d_loss = -torch.log(1.0 - source_d) d_loss = d_loss.view(config['batch_size']) weight_loss = y_loss + config['lambdad'] * d_loss # print('y_loss {}'.format(torch.mean(y_loss))) # print('lambdad {}'.format(config['lambdad'])) # print('d_loss {}'.format(torch.mean(d_loss))) # print('y_loss {}'.format(y_loss.item())) # print('lambdad {}'.format(config['lambdad'])) # print('d_loss {}'.format(d_loss.item())) weight_var = (weight_loss < age).float().detach() Ly = torch.mean(y_loss * weight_var) source_weight = weight_var.data.clone() source_num = float((torch.sum(source_weight))) return Ly, source_weight, source_num def cal_Lt(target_y_softmax): # 这是entropy loss吧? Gt_var = target_y_softmax Gt_en = -torch.sum((Gt_var * torch.log(Gt_var + 1e-8)), 1) Lt = torch.mean(Gt_en) return Lt def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader if config['slim'] > 0: iter_target_semi = iter(config['target_train_semi_loader']) len_target_semi_loader = len(config['target_train_semi_loader']) for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if config['slim'] > 0: data_target_semi, label_target_semi = iter_target_semi.next() if step % len_target_semi_loader == 0: iter_target_semi = iter(config['target_train_semi_loader']) if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() if config['slim'] > 0: data_target_semi, label_target_semi = data_target_semi.cuda( ), label_target_semi.cuda() source_domain_label = torch.FloatTensor(config['batch_size'], 1) target_domain_label = torch.FloatTensor(config['batch_size'], 1) source_domain_label.fill_(1) target_domain_label.fill_(0) domain_label = torch.cat( [source_domain_label, target_domain_label], 0) domain_label = domain_label.cuda() inputs = torch.cat([data_source, data_target], 0) features = extractor(inputs) gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 y_var = classifier(features) features = features.view(features.size(0), -1) d_var = ad_net(features, gamma) y_softmax_var = nn.Softmax(dim=1)(y_var) source_y, target_y = y_var.chunk(2, 0) source_y_softmax, target_y_softmax = y_softmax_var.chunk(2, 0) source_d, target_d = d_var.chunk(2, 0) # h_s = extractor(data_source) # h_s = h_s.view(h_s.size(0), -1) # h_t = extractor(data_target) # h_t = h_t.view(h_t.size(0), -1) # source_preds = classifier(h_s) # softmax_output_s = nn.Softmax(dim=1)(source_preds) # target_preds = classifier(h_t) # softmax_output_t = nn.Softmax(dim=1)(target_preds) # source_d, d_loss_source = loss_func.DANN_logits(h_s, ad_net, gamma) # target_d, d_loss_target = loss_func.DANN_logits(h_t, ad_net, gamma) # source_d = ad_net(h_s, gamma) # target_d = ad_net(h_t, gamma) #calculate Ly if epoch < config['startiter']: #也就是cls_loss,不考虑权重 Ly = nn.CrossEntropyLoss()(source_y, label_source) else: Ly, source_weight, source_num = cal_Ly(source_y_softmax, source_d, label_source) # print('source_num {}'.format(source_num)) target_weight = torch.ones(source_weight.size()).cuda() #calculate Lt # 计算target category的熵 Lt = cal_Lt(target_y_softmax) #calculate Ld if epoch < config['startiter']: Ld = nn.BCELoss()(d_var, domain_label) else: domain_weight = torch.cat([source_weight, target_weight], 0) domain_weight = domain_weight.view(-1, 1) # print('domain_weight {}'.format(domain_weight.shape)) # print('domain_weight {}'.format(domain_weight)) # print('d_var {}'.format(d_var)) domain_criterion = nn.BCELoss(weight=domain_weight).cuda() # domain_criterion = nn.BCELoss().cuda() # print('max {}'.format(torch.max(d_var))) # print('min {}'.format(torch.min(d_var))) # print(d_var) Ld = domain_criterion(d_var, domain_label) loss = Ly + config['traded'] * Ld + config['tradet'] * Lt if config['slim'] > 0: feature_target_semi = extractor(data_target_semi) feature_target_semi = feature_target_semi.view( feature_target_semi.size(0), -1) preds_target_semi = classifier(feature_target_semi) loss += nn.CrossEntropyLoss()(preds_target_semi, label_target_semi) optimizer.zero_grad() optimizer_ad.zero_grad() # net.zero_grad() loss.backward() optimizer.step() optimizer_ad.step() # if (step) % 20 == 0: # print('Train Epoch {} closs {:.6f}, dloss {:.6f}, coral_loss {:.6f}, Loss {:.6f}'.format(epoch, cls_loss.item(), d_loss.item(), coral_loss.item(), loss.item())) # print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'.format(epoch, cls_loss.item(), d_loss.item(), loss.item())) best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch torch.save(extractor.state_dict(), extractor_path) torch.save(classifier.state_dict(), classifier_path) torch.save(ad_net.state_dict(), adnet_path) print( 'epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'. format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False)
def train_cdan_vat(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], bn=config['bn']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() cdan_random = config['random_layer'] res_dir = os.path.join( config['res_dir'], 'normal{}-{}-dilation{}-lr{}'.format(config['normal'], config['network'], config['dilation'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_cdan_vat') #print(extractor) #print(classifier) print(config) set_log_config(res_dir) logging.debug('train_cdan_vat') logging.debug(extractor) logging.debug(classifier) logging.debug(config) if config['models'] == 'DANN_VAT': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([{ 'params': extractor.parameters(), 'lr': config['lr'] }, { 'params': classifier.parameters(), 'lr': config['lr'] }]) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr']) vat_loss = VAT(extractor, classifier, n_power=1, radius=3.5).cuda() def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, label_target = iter_target.next() if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target, label_target = data_target.cuda( ), label_target.cuda() with torch.no_grad(): if 'CDAN' in config['models']: h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) source_preds = classifier(h_s) softmax_output_s = nn.Softmax(dim=1)(source_preds) op_out = torch.bmm(softmax_output_s.unsqueeze(2), h_s.unsqueeze(1)) gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 ad_out = ad_net(op_out.view( -1, softmax_output_s.size(1) * h_s.size(1)), gamma, training=False) dom_entropy = 1 - (torch.abs(0.5 - ad_out))**config['iw'] dom_weight = dom_entropy elif 'DANN' in config['models']: h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 ad_out = ad_net(h_s, gamma, training=False) dom_entropy = 1 - (torch.abs(0.5 - ad_out))**config['iw'] dom_weight = dom_entropy optimizer.zero_grad() optimizer_ad.zero_grad() h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) softmax_output_s = nn.Softmax(dim=1)(source_preds) if config['iw'] > 0: cls_loss = nn.CrossEntropyLoss(reduction='none')(source_preds, label_source) cls_loss = torch.mean(dom_weight * cls_loss) # print('dom_weight mean {}'.format(torch.mean(dom_weight))) else: cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) target_preds = classifier(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) if config['target_labeling'] == 1: cls_loss += nn.CrossEntropyLoss()(target_preds, label_target) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) d_loss = loss_func.CDAN( [feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter * (epoch - start_epoch) + step), random_layer) elif config['models'] == 'CDAN_VAT': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'DANN_VAT': d_loss = loss_func.DANN(feature, ad_net, gamma) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 # target entropy loss err_t_entropy = get_loss_entropy(softmax_output_t) # virtual adversarial loss. err_s_vat = vat_loss(data_source, source_preds) err_t_vat = vat_loss(data_target, target_preds) # loss = cls_loss + d_loss loss = cls_loss + d_loss + err_t_entropy + err_s_vat + err_t_vat loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 20 == 0: print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'. format(epoch, cls_loss.item(), d_loss.item(), loss.item())) best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch print( 'epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'. format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False)
def train_cdan_ican(config): BATCH_SIZE = config['batch_size'] extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) disc_activate = Contrast_ReLU_activate(INI_DISC_WEIGHT_SCALE, INI_DISC_BIAS) cdan_random = config['random_layer'] if config['models'] == 'DANN': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None # ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() disc_activate = disc_activate.cuda() ad_net = ad_net.cuda() optimizer = torch.optim.Adam([{ 'params': extractor.parameters(), 'lr': config['lr'] }, { 'params': classifier.parameters(), 'lr': config['lr'] }]) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr']) pseudo_optimizer = torch.optim.Adam(disc_activate.parameters(), lr=config['lr']) class_criterion = nn.CrossEntropyLoss() res_dir = os.path.join( config['res_dir'], 'random{}-bs{}-lr{}'.format(cdan_random, config['batch_size'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_cdan_ican') print(extractor) print(classifier) print(ad_net) print(config) set_log_config(res_dir) logging.debug('train_cdan_ican') logging.debug(extractor) logging.debug(classifier) logging.debug(ad_net) logging.debug(config) def select_samples_ican(extractor, classifier, ad_net, disc_activate, config, epoch, epoch_acc_s): set_training_mode(extractor, False) set_training_mode(classifier, False) set_training_mode(ad_net, False) set_training_mode(disc_activate, False) Pseudo_set = [] confid_threshold = 1 / (1 + np.exp(-2.4 * epoch_acc_s)) total_pseudo_errors = 0 # 为什么在target测试集上进行? # for target_inputs, target_labels in iter(config['target_test_loader']): for target_inputs, target_labels in iter( config['target_train_loader']): target_inputs = target_inputs.cuda() # 论文中target的domain label是1 domain_labels_t = torch.FloatTensor([0.] * len(target_inputs)).cuda() embeddings = extractor(target_inputs) class_t = classifier(embeddings) domain_out_t = ad_net(embeddings, training=False) disc_weight_t, w_t, b_t = disc_activate(domain_out_t, domain_labels_t) top_prob, preds_t = torch.max(class_t, 1) for i in range(len(disc_weight_t)): if disc_weight_t[i] > b_t and top_prob[i] >= float( confid_threshold): s_tuple = (target_inputs[i].cpu(), (preds_t[i].cpu(), float(disc_weight_t[i]))) Pseudo_set.append(s_tuple) total_pseudo_errors += preds_t.eq(target_labels.cuda()).cpu().sum() # 每个pseudo_set样本中包括[features, category_class_predict, domain_weight_predict], [特征,预测的类标,预测的domain权重] # print("Pseudo error/total = {}/{}, confid_threshold: {:.4f}".format(total_pseudo_errors, len(Pseudo_set), # confid_threshold)) print( 'Epoch {}, Stage Select_Sample, accuracy {}, confident threshold {}, pseudo number {}, b_t {}' .format(epoch, epoch_acc_s, confid_threshold, len(Pseudo_set), b_t)) draw_dict['confid_threshold_point'].append( float("%.4f" % confid_threshold)) return Pseudo_set # TODO: 为什么不在上一个函数中直接更新呢?选择pseudo-set之后就更新disc-activate的模型参数,完全可以合并成一步 def update_ican(extractor, classifier, ad_net, disc_activate, config, Pseudo_set, epoch): if len(Pseudo_set) == 0: return set_training_mode(extractor, False) set_training_mode(classifier, False) set_training_mode(ad_net, False) set_training_mode(disc_activate, True) pseudo_batch_count = 0 pseudo_sample_count = 0 pseudo_epoch_loss = 0.0 pseudo_epoch_acc = 0 pseudo_epoch_corrects = 0 pseudo_avg_loss = 0.0 # TODO: 每次从pseudo-set中取半个batch-size pseudo_loader = torch.utils.data.DataLoader(Pseudo_set, batch_size=int(BATCH_SIZE / 2), shuffle=True) for pseudo_inputs, pseudo_labels in pseudo_loader: pseudo_batch_count += 1 pseudo_sample_count += len(pseudo_inputs) pseudo_labels, pseudo_weights = pseudo_labels[0], pseudo_labels[1] pseudo_inputs, pseudo_labels = pseudo_inputs.cuda( ), pseudo_labels.cuda() domain_labels = torch.FloatTensor([0.] * len(pseudo_inputs)).cuda() embeddings = extractor(pseudo_inputs) pseudo_class = classifier(embeddings) pseudo_domain_out = ad_net(embeddings, training=False) pseudo_disc_weight, pseudo_ww, pseudo_bb = disc_activate( pseudo_domain_out, domain_labels) pseudo_optimizer.zero_grad() # TODO:为什么不用这个pseudo_preds, 而要用上个函数保存的结果呢? _, pseudo_preds = torch.max(pseudo_class, 1) # pseudo_class:未经过softmax的类分类概率 # pseudo_labels: 经过softmax的类标签 # pseudo_disc_weight:样本的领域权重 # TODO:检查pseudo_disc_weight的形状 # pseudo_class_loss = compute_new_loss(pseudo_class, pseudo_labels, pseudo_disc_weight) pseudo_class_loss = compute_new_loss(pseudo_class, pseudo_preds, pseudo_disc_weight) # pseudo_class_loss = class_criterion(pseudo_class, pseudo_preds) pseudo_epoch_loss += float(pseudo_class_loss) # 这个正确率没有意义 # pseudo_preds 是pseudo_class的最大值,是target train的预测值 # pseudo_labels 是上一个函数(选择pseudo-set时)计算出来的,同样的公式 pseudo_epoch_corrects += int( torch.sum(pseudo_preds.squeeze() == pseudo_labels.squeeze())) pseudo_loss = pseudo_class_loss pseudo_loss.backward() pseudo_optimizer.step() epoch_discrim_lambda = 1.0 / (abs(pseudo_ww)**(1. / 4)) epoch_discrim_bias = pseudo_bb pseudo_avg_loss = pseudo_epoch_loss / pseudo_batch_count pseudo_epoch_acc = pseudo_epoch_corrects / pseudo_sample_count print( 'Epoch {}, Phase: {}, Loss: {:.4f} Acc: {:.4f} Disc_Lam: {:.6f} Disc_bias: {:.4f} ' .format(epoch, 'Pseudo_train', pseudo_avg_loss, pseudo_epoch_acc, epoch_discrim_lambda, epoch_discrim_bias)) def prepare_dataset(epoch, pseudo_set): dset_loaders = {} dset_loaders['source'] = config['source_train_loader'] source_size = len(config['source_train_loader']) pseudo_size = len(pseudo_set) # source_batches_per_epoch = np.floor(source_size * 2 / BATCH_SIZE).astype(np.int16) # total_epochs = config['n_epochs'] if pseudo_size == 0: dset_loaders['pseudo'] = [] dset_loaders['pseudo_source'] = [] # source_batchsize = int(BATCH_SIZE / 2) source_batchsize = BATCH_SIZE pseudo_batchsize = 0 else: # source_batchsize = int(int(BATCH_SIZE / 2) * source_size # / (source_size + pseudo_size)) # if source_batchsize == int(BATCH_SIZE / 2): # source_batchsize -= 1 # if source_batchsize < int(int(BATCH_SIZE / 2) / 2): # source_batchsize = int(int(BATCH_SIZE / 2) / 2) # pseudo_batchsize = int(BATCH_SIZE / 2) - source_batchsize # print('source_batchsize {}, pseudo_batchsize {}'.format(source_batchsize, pseudo_batchsize)) # dset_loaders['pseudo'] = torch.utils.data.DataLoader(pseudo_set, # batch_size=pseudo_batchsize, shuffle=True) # dset_loaders['pseudo_source'] = config['source_train_loader'] # # 重新修改,按照source_train中每个epoch的batch数量,计算pseudo-set的batchsize pseudo_batchsize = int( np.floor(pseudo_size / len(config['source_train_loader']))) dset_loaders['pseudo'] = torch.utils.data.DataLoader( pseudo_set, batch_size=pseudo_batchsize, shuffle=True, drop_last=False) dset_loaders['pseudo_source'] = config['source_train_loader'] source_batchsize = BATCH_SIZE print( 'Epoch {}, Stage prepare_dataset, pseudo_size {}, num batch each epoch: {}, pseudo_batchsize {}' .format(epoch, pseudo_size, source_size, pseudo_batchsize)) target_dict = [(i, j) for (i, j) in config['target_train_loader']] if pseudo_size > 0: pseudo_dict = [(i, j) for (i, j) in dset_loaders['pseudo']] pseudo_source_dict = [(i, j) for (i, j) in dset_loaders['pseudo_source']] else: pseudo_dict = [] pseudo_source_dict = [] # total_iters = source_batches_per_epoch * pre_epochs + \ # source_batches_per_epoch * (total_epochs - pre_epochs) * \ # BATCH_SIZE / (source_batchsize * 2) # total_iters = source_batches_per_epoch * (total_epochs) * BATCH_SIZE / (source_batchsize * 2) return dset_loaders, target_dict, pseudo_dict, pseudo_source_dict, source_batchsize, pseudo_batchsize def train(extractor, classifier, ad_net, disc_activate, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() disc_activate.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() optimizer.zero_grad() optimizer_ad.zero_grad() h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) softmax_output_s = nn.Softmax(dim=1)(source_preds) target_preds = classifier(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN-E': entropy = loss_func.Entropy(softmax_output) d_loss = loss_func.CDAN( [feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter * (epoch - start_epoch) + step), random_layer) elif config['models'] == 'CDAN': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'DANN': d_loss = loss_func.DANN(feature, ad_net, gamma) elif config['models'] == 'CDAN_ICAN': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 loss = cls_loss + d_loss loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 20 == 0: print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'. format(epoch, cls_loss.item(), d_loss.item(), loss.item())) # def do_forward(extractor, classifier, ad_net, disc_activate, src_features, all_features, labels): # # 预测source features的class labels # bottle = extractor(src_features) # class_pred = classifier(bottle) # dom_pred = ad_net(bottle) # return class_pred, dom_pred.squeeze(1) def do_training(dset_loaders, target_dict, source_batchsize, pseudo_batchsize, pseudo_dict, pseudo_source_dict): batch_count = 0 target_pointer = 0 target_pointer = 0 pseudo_pointer = 0 pseudo_source_pointer = 0 INI_MAIN_THRESH = -0.8 # pre_epochs = 10 pre_epochs = 0 set_training_mode(extractor, True) set_training_mode(classifier, True) set_training_mode(ad_net, True) set_training_mode(disc_activate, False) # class_count = 0 # epoch_loss = 0.0 # epoch_corrects = 0 # domain_epoch_loss = 0.0 # ini_w_main = torch.FloatTensor([float(INI_MAIN_THRESH)]).cuda() # epoch_batch_count = 0 # total_epoch_loss = 0.0 # domain_epoch_corrects = 0 # domain_counts = 0 for data in dset_loaders['source']: inputs, labels = data batch_count += 1 # ---------------- reset exceeded datasets -------------------- if target_pointer >= len(target_dict) - 1: target_pointer = 0 target_dict = [(i, j) for (i, j) in config['target_train_loader']] target_inputs = target_dict[target_pointer][0] if epoch <= pre_epochs: # 训练CAN,使用source_train和target_train,target_train不经筛选,全部使用 # -------------------- pretrain model ----------------------- domain_inputs = torch.cat((inputs, target_inputs), 0) # domain_labels = torch.FloatTensor([1.]*BATCH_SIZE + [0.]*BATCH_SIZE) domain_labels = torch.FloatTensor([1.] * inputs.size(0) + [0.] * target_inputs.size(0)) domain_inputs, domain_labels = domain_inputs.cuda( ), domain_labels.cuda() inputs, labels = inputs.cuda(), labels.cuda() # print('inputs {}, target_inputs {}, domain_inputs {}, domain_labels {}'.format(inputs.size(), target_inputs.size(), domain_inputs.size(), domain_labels.size())) # source数据集上的分类结果 class_outputs = classifier(extractor(inputs)) # 在source和target数据集上判断domain分类 domain_outputs = ad_net(extractor(domain_inputs)).squeeze() target_pointer += 1 # epoch_discrim_bias = 0.5 # ------------ training classification statistics -------------- criterion = nn.CrossEntropyLoss() class_loss = criterion(class_outputs, labels) else: # -------------- train with pseudo sample model ------------- # target域使用经过筛选的pseudo-set数据 pseudo_weights = torch.FloatTensor([]) pseudo_size = len(pseudo_dict) # 重置索引位置 if (pseudo_pointer >= len(pseudo_dict) - 1) and (len(pseudo_dict) != 0): pseudo_pointer = 0 pseudo_dict = [(i, j) for (i, j) in dset_loaders['pseudo']] if (pseudo_source_pointer >= len(pseudo_source_dict) - 1) and ( len(pseudo_source_dict) != 0): pseudo_source_pointer = 0 pseudo_source_dict = [ (i, j) for (i, j) in dset_loaders['pseudo_source'] ] if pseudo_size == 0: # 如果pseudo-set为空,那还是使用全部source_train和target_train domain_inputs = torch.cat((inputs, target_inputs), 0) # domain_labels = torch.FloatTensor([1.]*int(BATCH_SIZE / 2)+ # [0.]*int(BATCH_SIZE / 2)) domain_labels = torch.FloatTensor([1.] * inputs.size(0) + [0.] * target_inputs.size(0)) fuse_inputs = inputs fuse_labels = labels else: pseudo_inputs, pseudo_labels, pseudo_weights = pseudo_dict[pseudo_pointer][0], \ pseudo_dict[pseudo_pointer][1][0], pseudo_dict[pseudo_pointer][1][1] pseudo_source_inputs = pseudo_source_dict[ pseudo_source_pointer][0] # TODO: 为什么要这么干?source + pseudo + target + source # domain_inputs = torch.cat((inputs, pseudo_inputs, target_inputs, pseudo_source_inputs),0) # domain_labels = torch.FloatTensor([1.]*inputs.size(0) + [0.]*pseudo_inputs.size(0) + # [0.]*target_inputs.size(0)+[1.]*pseudo_source_inputs.size(0)) domain_inputs = torch.cat((inputs, pseudo_inputs), 0) domain_labels = torch.FloatTensor([1.] * inputs.size(0) + [0.] * pseudo_inputs.size(0)) fuse_inputs = torch.cat((inputs, pseudo_inputs), 0) fuse_labels = torch.cat((labels, pseudo_labels), 0) # print('inputs {}, pseudo_inputs {}, target_inputs {}, domain_inputs {}'.format(inputs.size(), pseudo_inputs.size(), target_inputs.size(), domain_inputs.size())) # print('domain_labels {}, fuse_inputs {}, fuse_labels {}'.format(domain_labels.size(), fuse_inputs.size(), fuse_labels.size())) inputs, labels = fuse_inputs.cuda(), fuse_labels.cuda() domain_inputs, domain_labels = domain_inputs.cuda( ), domain_labels.cuda() source_weight_tensor = torch.FloatTensor([1.] * source_batchsize) pseudo_weights_tensor = pseudo_weights.float() class_weights_tensor = torch.cat( (source_weight_tensor, pseudo_weights_tensor), 0) dom_weights_tensor = torch.FloatTensor([0.] * source_batchsize + [1.] * pseudo_batchsize) ini_weight = torch.cat( (class_weights_tensor, dom_weights_tensor), 0).squeeze().cuda() class_outputs = classifier(extractor(inputs)) domain_outputs = ad_net(extractor(domain_inputs)).squeeze() # ------------ training classification statistics -------------- # _, preds = torch.max(class_outputs, 1) # class_count += len(preds) class_loss = compute_new_loss(class_outputs, labels, ini_weight) # epoch_loss += float(class_loss) # epoch_corrects += int(torch.sum(preds.squeeze() == labels.squeeze())) target_pointer += 1 pseudo_pointer += 1 pseudo_source_pointer += 1 # zero the parameter gradients optimizer.zero_grad() optimizer_ad.zero_grad() # ----------- calculate pred domain labels and losses ----------- domain_criterion = nn.BCEWithLogitsLoss() domain_labels = domain_labels.squeeze() domain_loss = domain_criterion(domain_outputs, domain_labels) # domain_epoch_loss += float(domain_loss) # ------ calculate pseudo predicts and losses with weights and threshold lambda ------- total_loss = class_loss + 1.0 * domain_loss # total_epoch_loss += float(total_loss) print('class_loss {}, domain_loss {}'.format( class_loss.item(), domain_loss.item())) # ------- backward + optimize in training and Pseudo-training phase ------- total_loss.backward() optimizer.step() optimizer_ad.step() def train_ican(extractor, classifier, ad_net, disc_activate, config, epoch): # start_epoch = 0 # 1. 计算在source上的准确度,用于选择伪标签 accuracy_s = test(extractor, classifier, config['source_test_loader'], epoch) # 2. 计算伪标签数据集 pseu_set = select_samples_ican(extractor, classifier, ad_net, disc_activate, config, epoch, accuracy_s) # 3. 使用伪数据集训练disc_activate,更新disc threshold update_ican(extractor, classifier, ad_net, disc_activate, config, pseu_set, epoch) # 4. 准备最终训练ican所用的数据集,将source dataset和pseudo set合并 dset_loaders, target_dict, pseudo_dict, pseudo_source_dict, source_batchsize, pseudo_batchsize = prepare_dataset( epoch, pseu_set) # 5. train # do_training() do_training(dset_loaders, target_dict, source_batchsize, pseudo_batchsize, pseudo_dict, pseudo_source_dict) # iter_source = iter(config['source_train_loader']) # iter_target = iter(config['target_train_loader']) # len_source_loader = len(config['source_train_loader']) # len_target_loader = len(config['target_train_loader']) # num_iter = len_source_loader # for step in range(1, num_iter + 1): # data_source, label_source = iter_source.next() # data_target, _ = iter_target.next() # if step % len_target_loader == 0: # iter_target = iter(config['target_train_loader']) # if torch.cuda.is_available(): # data_source, label_source = data_source.cuda(), label_source.cuda() # data_target = data_target.cuda() # optimizer.zero_grad() # optimizer_ad.zero_grad() # h_s = extractor(data_source) # h_s = h_s.view(h_s.size(0), -1) # h_t = extractor(data_target) # h_t = h_t.view(h_t.size(0), -1) # source_preds = classifier(h_s) # cls_loss = nn.CrossEntropyLoss()(source_preds, label_source) # softmax_output_s = nn.Softmax(dim=1)(source_preds) # target_preds = classifier(h_t) # softmax_output_t = nn.Softmax(dim=1)(target_preds) # feature = torch.cat((h_s, h_t), 0) # softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) # if epoch > start_epoch: # gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 # if config['models'] == 'CDAN-E': # entropy = loss_func.Entropy(softmax_output) # d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter*(epoch-start_epoch)+step), random_layer) # elif config['models'] == 'CDAN': # d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) # elif config['models'] == 'DANN': # d_loss = loss_func.DANN(feature, ad_net, gamma) # elif config['models'] == 'CDAN_ICAN': # d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) # else: # raise ValueError('Method cannot be recognized.') # else: # d_loss = 0 # loss = cls_loss + d_loss # loss.backward() # optimizer.step() # if epoch > start_epoch: # optimizer_ad.step() # if (step) % 20 == 0: # print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'.format(epoch, cls_loss.item(), d_loss.item(), loss.item())) # function done for epoch in range(1, config['n_epochs'] + 1): train_ican(extractor, classifier, ad_net, disc_activate, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: print('test on source_test_loader') test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_test_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True)
def train_cdan_iw(config): if config['network'] == 'inceptionv1': extractor = InceptionV1(num_classes=32) elif config['network'] == 'inceptionv1s': extractor = InceptionV1s(num_classes=32) else: extractor = Extractor(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens']) classifier = Classifier(n_flattens=config['n_flattens'], n_hiddens=config['n_hiddens'], n_class=config['n_class']) if torch.cuda.is_available(): extractor = extractor.cuda() classifier = classifier.cuda() #summary(extractor, (1, 5120)) cdan_random = config['random_layer'] res_dir = os.path.join( config['res_dir'], 'normal{}-{}-dilation{}-iw{}-lr{}'.format(config['normal'], config['network'], config['dilation'], config['iw'], config['lr'])) if not os.path.exists(res_dir): os.makedirs(res_dir) print('train_cdan_iw') #print(extractor) #print(classifier) print(config) set_log_config(res_dir) logging.debug('train_cdan') logging.debug(extractor) logging.debug(classifier) logging.debug(config) if config['models'] == 'DANN_IW': random_layer = None ad_net = AdversarialNetwork(config['n_flattens'], config['n_hiddens']) elif cdan_random: random_layer = RandomLayer([config['n_flattens'], config['n_class']], config['n_hiddens']) ad_net = AdversarialNetwork(config['n_hiddens'], config['n_hiddens']) random_layer.cuda() else: random_layer = None ad_net = AdversarialNetwork(config['n_flattens'] * config['n_class'], config['n_hiddens']) ad_net = ad_net.cuda() optimizer = torch.optim.Adam([{ 'params': extractor.parameters(), 'lr': config['lr'] }, { 'params': classifier.parameters(), 'lr': config['lr'] }], weight_decay=0.0001) optimizer_ad = torch.optim.Adam(ad_net.parameters(), lr=config['lr'], weight_decay=0.0001) print(ad_net) extractor_path = os.path.join(res_dir, "extractor.pth") classifier_path = os.path.join(res_dir, "classifier.pth") adnet_path = os.path.join(res_dir, "adnet.pth") def train(extractor, classifier, ad_net, config, epoch): start_epoch = 0 extractor.train() classifier.train() ad_net.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for step in range(1, num_iter + 1): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if step % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() """ add code start """ with torch.no_grad(): if config['models'] == 'CDAN_IW': h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) softmax_output_s = nn.Softmax(dim=1)(source_preds) # print(softmax_output_s.shape) # print(softmax_output_s.unsqueeze(2).shape) # print(softmax_output_s) # target_preds = classifier(h_t) # softmax_output_t = nn.Softmax(dim=1)(target_preds) # feature = torch.cat((h_s, h_t), 0) # softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) weights = torch.ones(softmax_output_s.shape).cuda() weights = 1.0 * weights weights = weights.unsqueeze(2) # op_out = torch.bmm(softmax_output_s.unsqueeze(2), h_s.unsqueeze(1)) op_out = torch.bmm(weights, h_s.unsqueeze(1)) # gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 gamma = 1 ad_out = ad_net(op_out.view( -1, softmax_output_s.size(1) * h_s.size(1)), gamma, training=False) # dom_entropy = loss_func.Entropy(ad_out) dom_entropy = 1 + (torch.abs(0.5 - ad_out))**config['iw'] # dom_weight = dom_entropy / torch.sum(dom_entropy) dom_weight = dom_entropy elif config['models'] == 'DANN_IW': h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) # gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 gamma = 1 ad_out = ad_net(h_s, gamma, training=False) # dom_entropy = 1-((torch.abs(0.5-ad_out))**config['iw']) # dom_weight = dom_entropy dom_weight = torch.ones(ad_out.shape).cuda() #dom_entropy = loss_func.Entropy(dom_entropy) # dom_weight = dom_entropy / torch.sum(dom_entropy) """ add code end """ optimizer.zero_grad() optimizer_ad.zero_grad() h_s = extractor(data_source) h_s = h_s.view(h_s.size(0), -1) h_t = extractor(data_target) h_t = h_t.view(h_t.size(0), -1) source_preds = classifier(h_s) softmax_output_s = nn.Softmax(dim=1)(source_preds) target_preds = classifier(h_t) softmax_output_t = nn.Softmax(dim=1)(target_preds) feature = torch.cat((h_s, h_t), 0) softmax_output = torch.cat((softmax_output_s, softmax_output_t), 0) cls_loss = nn.CrossEntropyLoss(reduction='none')(source_preds, label_source) cls_loss = torch.mean(dom_weight * cls_loss) if epoch > start_epoch: gamma = 2 / (1 + math.exp(-10 * (epoch) / config['n_epochs'])) - 1 if config['models'] == 'CDAN_EIW': entropy = loss_func.Entropy(softmax_output) # print('softmax_output {}, entropy {}'.format(softmax_output.size(), entropy.size())) d_loss = loss_func.CDAN( [feature, softmax_output], ad_net, gamma, entropy, loss_func.calc_coeff(num_iter * (epoch - start_epoch) + step), random_layer) elif config['models'] == 'CDAN_IW': d_loss = loss_func.CDAN([feature, softmax_output], ad_net, gamma, None, None, random_layer) elif config['models'] == 'DANN_IW': d_loss = loss_func.DANN(feature, ad_net, gamma) else: raise ValueError('Method cannot be recognized.') else: d_loss = 0 loss = cls_loss + d_loss loss.backward() optimizer.step() if epoch > start_epoch: optimizer_ad.step() if (step) % 20 == 0: print('Train Epoch {} closs {:.6f}, dloss {:.6f}, Loss {:.6f}'. format(epoch, cls_loss.item(), d_loss.item(), loss.item())) best_accuracy = 0 best_model_index = -1 for epoch in range(1, config['n_epochs'] + 1): train(extractor, classifier, ad_net, config, epoch) if epoch % config['TEST_INTERVAL'] == 0: # print('test on source_test_loader') # test(extractor, classifier, config['source_test_loader'], epoch) print('test on target_test_loader') accuracy = test(extractor, classifier, config['target_test_loader'], epoch) if accuracy > best_accuracy: best_accuracy = accuracy best_model_index = epoch torch.save(extractor.state_dict(), extractor_path) torch.save(classifier.state_dict(), classifier_path) torch.save(ad_net.state_dict(), adnet_path) print( 'epoch {} accuracy: {:.6f}, best accuracy {:.6f} on epoch {}'. format(epoch, accuracy, best_accuracy, best_model_index)) if epoch % config['VIS_INTERVAL'] == 0: title = config['models'] draw_confusion_matrix(extractor, classifier, config['target_test_loader'], res_dir, epoch, title) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=True) draw_tsne(extractor, classifier, config['source_train_loader'], config['target_test_loader'], res_dir, epoch, title, separate=False)