def main(): global args, best_prec1 args = opts() # ipdb.set_trace() # args = parser.parse_args() model_source, model_target = resnet(args) # define-multi GPU model_source = torch.nn.DataParallel(model_source).cuda() model_target = torch.nn.DataParallel(model_target).cuda() print('the memory id should be same for the shared feature extractor:') print(id(model_source.module.resnet_conv)) # the memory is shared here print(id(model_target.module.resnet_conv)) print('the memory id should be different for the different classifiers:') print(id(model_source.module.fc)) # the memory id shared here. print(id(model_target.module.fc)) # define loss function(criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() np.random.seed(1) ### fix the random data. random.seed(1) # optimizer = torch.optim.SGD(model.parameters(), # To apply different learning rate to different layer if args.meta_sgd: meta_train_lr = [] for param in model_target.parameters(): meta_train_lr.append( torch.FloatTensor(param.data.size()).fill_( args.meta_train_lr).cuda()) if args.pretrained: print('the pretrained setting of optimizer') if args.auxiliary_dataset == 'imagenet': optimizer = torch.optim.SGD([ { 'params': model_source.module.resnet_conv.parameters(), 'name': 'pre-trained' }, { 'params': model_source.module.fc.parameters(), 'name': 'pre-trained' }, { 'params': model_target.module.fc.parameters(), 'name': 'new-added' }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.auxiliary_dataset == 'l_bird': optimizer = torch.optim.SGD([ { 'params': model_source.module.resnet_conv.parameters(), 'name': 'pre-trained' }, { 'params': model_source.module.fc.parameters(), 'name': 'pre-trained' }, { 'params': model_target.module.fc.parameters(), 'name': 'new-added' }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: print('the from scratch setting of optimizer') optimizer = torch.optim.SGD([ { 'params': model_source.module.resnet_conv.parameters(), 'name': 'new-added' }, { 'params': model_source.module.fc.parameters(), 'name': 'new-added' }, { 'params': model_target.module.fc.parameters(), 'name': 'new-added' }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): # raise ValueError('the resume function is not finished') print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.meta_sgd: meta_train_lr = checkpoint['meta_train_lr'] best_prec1 = checkpoint['best_prec1'] model_source.load_state_dict(checkpoint['source_state_dict']) model_target.load_state_dict(checkpoint['target_state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) else: raise ValueError('The file to be resumed from is not exited', args.resume) if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'w') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() cudnn.benchmark = True # process the data and prepare the dataloaders. dataloader_returned = generate_dataloader(args) dataloader_number_returned = len(dataloader_returned) print('the number of dataloader number returned is: ', dataloader_number_returned) if dataloader_number_returned != 2: train_loader_source, val_loader_source, train_loader_target, val_loader_target = dataloader_returned else: train_loader_target, val_loader_target = dataloader_returned train_loader_source = None # train_loader, val_loader = generate_dataloader(args) # test only if args.test_only: if dataloader_number_returned == 2: validate(None, val_loader_target, model_source, model_target, criterion, 0, args) else: validate(val_loader_source, val_loader_target, model_source, model_target, criterion, 0, args) # if args.auxiliary_dataset == 'imagenet': # validate(val_loader_source, val_loader_target, model_source, model_target, criterion, 0, args) # else: # validate(None, val_loader_target, model_source, model_target, criterion, 0, args) return print('begin training') if train_loader_source: train_loader_source_batch = enumerate(train_loader_source) else: train_loader_source_batch = None train_loader_target_batch = enumerate(train_loader_target) for epoch in range(args.start_epoch, args.epochs): # train for one epoch if args.meta_sgd: train_loader_source_batch, train_loader_target_batch, meta_train_lr = train( train_loader_source, train_loader_source_batch, train_loader_target, train_loader_target_batch, model_source, model_target, criterion, optimizer, epoch, args, meta_train_lr) else: train_loader_source_batch, train_loader_target_batch = train( train_loader_source, train_loader_source_batch, train_loader_target, train_loader_target_batch, model_source, model_target, criterion, optimizer, epoch, args, None) # train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on the val data if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % args.epochs == 0: if dataloader_number_returned == 2: prec1 = validate(None, val_loader_target, model_source, model_target, criterion, epoch, args) else: prec1 = validate(val_loader_source, val_loader_target, model_source, model_target, criterion, epoch, args) # prec1 = 1 # record the best prec1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: log = open(os.path.join(args.log, 'log.txt'), 'a') log.write(' \nTarget_T1 acc: %3f' % (best_prec1)) log.close() if args.meta_sgd: save_checkpoint( { 'epoch': epoch + 1, 'meta_train_lr': meta_train_lr, 'arch': args.arch, 'source_state_dict': model_source.state_dict(), 'target_state_dict': model_target.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args, epoch) else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'source_state_dict': model_source.state_dict(), 'target_state_dict': model_target.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args, epoch + 1)
def main(): global args, best_prec1, best_test_prec1, cond_best_test_prec1, best_cluster_acc, best_cluster_acc_2 # define model model = Model_Construct(args) print(model) model = torch.nn.DataParallel(model).cuda() # define multiple GPUs # define learnable cluster centers learn_cen = Variable(torch.cuda.FloatTensor(args.num_classes, 2048).fill_(0)) learn_cen.requires_grad_(True) learn_cen_2 = Variable(torch.cuda.FloatTensor(args.num_classes, args.num_neurons * 4).fill_(0)) learn_cen_2.requires_grad_(True) # define loss function/criterion and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() criterion_cons = ConsensusLoss(nClass=args.num_classes, div=args.div).cuda() np.random.seed(1) # may fix test data random.seed(1) torch.manual_seed(1) # apply different learning rates to different layer optimizer = torch.optim.SGD([ {'params': model.module.conv1.parameters(), 'name': 'conv'}, {'params': model.module.bn1.parameters(), 'name': 'conv'}, {'params': model.module.layer1.parameters(), 'name': 'conv'}, {'params': model.module.layer2.parameters(), 'name': 'conv'}, {'params': model.module.layer3.parameters(), 'name': 'conv'}, {'params': model.module.layer4.parameters(), 'name': 'conv'}, {'params': model.module.fc1.parameters(), 'name': 'ca_cl'}, {'params': model.module.fc2.parameters(), 'name': 'ca_cl'}, {'params': learn_cen, 'name': 'conv'}, {'params': learn_cen_2, 'name': 'conv'} ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # resume epoch = 0 init_state_dict = model.state_dict() if args.resume: if os.path.isfile(args.resume): print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] best_test_prec1 = checkpoint['best_test_prec1'] cond_best_test_prec1 = checkpoint['cond_best_test_prec1'] model.load_state_dict(checkpoint['state_dict']) learn_cen = checkpoint['learn_cen'] learn_cen_2 = checkpoint['learn_cen_2'] print("==> loaded checkpoint '{}'(epoch {})".format(args.resume, checkpoint['epoch'])) else: raise ValueError('The file to be resumed from does not exist!', args.resume) # make log directory if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'a') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() # start time log = open(os.path.join(args.log, 'log.txt'), 'a') log.write('\n-------------------------------------------\n') log.write(time.asctime(time.localtime(time.time()))) log.write('\n-------------------------------------------') log.close() cudnn.benchmark = True # process data and prepare dataloaders train_loader_source, train_loader_target, val_loader_target, val_loader_target_t, val_loader_source = generate_dataloader(args) train_loader_target.dataset.tgts = list(np.array(torch.LongTensor(train_loader_target.dataset.tgts).fill_(-1))) # avoid using ground truth labels of target print('begin training') batch_number = count_epoch_on_large_dataset(train_loader_target, train_loader_source, args) num_itern_total = args.epochs * batch_number new_epoch_flag = False # if new epoch, new_epoch_flag=True test_flag = False # if test, test_flag=True src_cs = torch.cuda.FloatTensor(len(train_loader_source.dataset.tgts)).fill_(1) # initialize source weights count_itern_each_epoch = 0 for itern in range(epoch * batch_number, num_itern_total): # evaluate on the target training and test data if (itern == 0) or (count_itern_each_epoch == batch_number): prec1, c_s, c_s_2, c_t, c_t_2, c_srctar, c_srctar_2, source_features, source_features_2, source_targets, target_features, target_features_2, target_targets, pseudo_labels = validate_compute_cen(val_loader_target, val_loader_source, model, criterion, epoch, args) test_acc = validate(val_loader_target_t, model, criterion, epoch, args) test_flag = True # K-means clustering or its variants if ((itern == 0) and args.src_cen_first) or (args.initial_cluster == 2): cen = c_s cen_2 = c_s_2 else: cen = c_t cen_2 = c_t_2 if (itern != 0) and (args.initial_cluster != 0) and (args.cluster_method == 'kernel_kmeans'): cluster_acc, c_t = kernel_k_means(target_features, target_targets, pseudo_labels, train_loader_target, epoch, model, args, best_cluster_acc) cluster_acc_2, c_t_2 = kernel_k_means(target_features_2, target_targets, pseudo_labels, train_loader_target, epoch, model, args, best_cluster_acc_2, change_target=False) elif args.cluster_method != 'spherical_kmeans': cluster_acc, c_t = k_means(target_features, target_targets, train_loader_target, epoch, model, cen, args, best_cluster_acc) cluster_acc_2, c_t_2 = k_means(target_features_2, target_targets, train_loader_target, epoch, model, cen_2, args, best_cluster_acc_2, change_target=False) elif args.cluster_method == 'spherical_kmeans': cluster_acc, c_t = spherical_k_means(target_features, target_targets, train_loader_target, epoch, model, cen, args, best_cluster_acc) cluster_acc_2, c_t_2 = spherical_k_means(target_features_2, target_targets, train_loader_target, epoch, model, cen_2, args, best_cluster_acc_2, change_target=False) # record the best accuracy of K-means clustering log = open(os.path.join(args.log, 'log.txt'), 'a') if cluster_acc != best_cluster_acc: best_cluster_acc = cluster_acc log.write('\n best_cluster acc: %3f' % best_cluster_acc) if cluster_acc_2 != best_cluster_acc_2: best_cluster_acc_2 = cluster_acc_2 log.write('\n best_cluster_2 acc: %3f' % best_cluster_acc_2) log.close() # re-initialize learnable cluster centers if args.init_cen_on_st: cen = (c_t + c_s) / 2# or c_srctar cen_2 = (c_t_2 + c_s_2) / 2# or c_srctar_2 else: cen = c_t cen_2 = c_t_2 #if itern == 0: learn_cen.data = cen.data.clone() learn_cen_2.data = cen_2.data.clone() # select source samples if (itern != 0) and (args.src_soft_select or args.src_hard_select): src_cs = source_select(source_features, source_targets, target_features, pseudo_labels, train_loader_source, epoch, c_t.data.clone(), args) # use source pre-trained model to extract features for first clustering if (itern == 0) and args.src_pretr_first: model.load_state_dict(init_state_dict) if itern != 0: count_itern_each_epoch = 0 epoch += 1 batch_number = count_epoch_on_large_dataset(train_loader_target, train_loader_source, args) train_loader_target_batch = enumerate(train_loader_target) train_loader_source_batch = enumerate(train_loader_source) new_epoch_flag = True del source_features del source_features_2 del source_targets del target_features del target_features_2 del target_targets del pseudo_labels gc.collect() torch.cuda.empty_cache() torch.cuda.empty_cache() elif (args.src.find('visda') != -1) and (itern % int(num_itern_total / 200) == 0): prec1, _, _, _, _, _, _, _, _, _, _, _, _, _ = validate_compute_cen(val_loader_target, val_loader_source, model, criterion, epoch, args, compute_cen=False) test_acc = validate(val_loader_target_t, model, criterion, epoch, args) test_flag = True if test_flag: # record the best prec1 and save checkpoint log = open(os.path.join(args.log, 'log.txt'), 'a') if prec1 > best_prec1: best_prec1 = prec1 cond_best_test_prec1 = 0 log.write('\n best val acc till now: %3f' % best_prec1) if test_acc > best_test_prec1: best_test_prec1 = test_acc log.write('\n best test acc till now: %3f' % best_test_prec1) ipdb.set_trace() is_cond_best = ((prec1 == best_prec1) and (test_acc > cond_best_test_prec1)) if is_cond_best: cond_best_test_prec1 = test_acc log.write('\n cond best test acc till now: %3f' % cond_best_test_prec1) log.close() save_checkpoint({ 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'learn_cen': learn_cen, 'learn_cen_2': learn_cen_2, 'best_prec1': best_prec1, 'best_test_prec1': best_test_prec1, 'cond_best_test_prec1': cond_best_test_prec1, }, is_cond_best, args) test_flag = False # early stop if epoch > args.stop_epoch: break # train for one iteration train_loader_source_batch, train_loader_target_batch = train(train_loader_source, train_loader_source_batch, train_loader_target, train_loader_target_batch, model, learn_cen, learn_cen_2, criterion_cons, optimizer, itern, epoch, new_epoch_flag, src_cs, args) model = model.cuda() new_epoch_flag = False count_itern_each_epoch += 1 log = open(os.path.join(args.log, 'log.txt'), 'a') log.write('\n*** best val acc: %3f ***' % best_prec1) log.write('\n*** best test acc: %3f ***' % best_test_prec1) log.write('\n*** cond best test acc: %3f ***' % cond_best_test_prec1) # end time log.write('\n-------------------------------------------\n') log.write(time.asctime(time.localtime(time.time()))) log.write('\n-------------------------------------------\n') log.close()
def main(): global args, best_prec1 args = opts() # args = parser.parse_args() model = resnet(args.arch, args.pretrain, args) # define-multi GPU model = torch.nn.DataParallel(model).cuda() #print(model) # define loss function(criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # optimizer = torch.optim.SGD(model.parameters(), # To apply different learning rate to different layer #print(model.module) optimizer = torch.optim.SGD([{ 'params': model.module.conv1.parameters(), 'name': 'pre-trained' }, { 'params': model.module.bn1.parameters(), 'name': 'pre-trained' }, { 'params': model.module.layer1.parameters(), 'name': 'pre-trained' }, { 'params': model.module.layer2.parameters(), 'name': 'pre-trained' }, { 'params': model.module.layer3.parameters(), 'name': 'pre-trained' }, { 'params': model.module.layer4.parameters(), 'name': 'pre-trained' }, { 'params': model.module.fc.parameters(), 'lr': args.lr * 10, 'name': 'new-added' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) else: raise ValueError('The file to be resumed from is not exited', args.resume) else: if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'w') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() cudnn.benchmark = True # process the data and prepare the dataloaders. train_loader, val_loader = generate_dataloader(args) #test only if args.test_only: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on the val data prec1 = validate(val_loader, model, criterion, epoch, args) # record the best prec1 and save checkpoint is_best = prec1 > best_prec1 if is_best: log = open(os.path.join(args.log, 'log.txt'), 'a') log.write(" best result is %3f" % (prec1)) log.close() best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, epoch, is_best, args)
def main(): global args, best_prec1 args = opts() if args.arch.find('resnet') != -1: model = resnet(args) else: raise ValueError('Unavailable model architecture!!!') # define-multi GPU model = torch.nn.DataParallel(model).cuda() print(model) if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'a') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() log = open(os.path.join(args.log, 'log.txt'), 'a') log.write('\n-------------------------------------------\n') log.write(time.asctime(time.localtime(time.time()))) log.write('\n-------------------------------------------') log.close() cudnn.benchmark = True # process the data and prepare the dataloaders. # source_train_loader, source_val_loader, target_train_dataset, val_loader, source_val_loader_cluster, val_loader_cluster = generate_dataloader(args) source_train_loader_ce, source_train_dataset, target_train_loader_ce, target_train_dataset, source_val_loader, target_val_loader = generate_dataloader( args) if args.pseudo_type == 'cluster': ### the AO of CAN clusering_labels_for_path = download_feature_and_pca_clustering( 0, source_val_loader, target_val_loader, model, args) elif args.pseudo_type == 'lp': clusering_labels_for_path = download_feature_and_pca_label_prob( 0, source_val_loader, target_val_loader, model, args) else: raise NotImplementedError
def main(): start_time = datetime.now() start_time_str = datetime.strptime(drop_msecond(start_time), "%Y-%m-%d %H:%M:%S") args = opts() from trainer import train, validate # if args.ablation == '': # from trainer import train, validate # elif args.ablation == 'baseline': # from trainer_baseline import train, validate # elif args.ablation == 'wo_taskt': # from trainer_wo_taskt import train, validate # elif args.ablation == 'wo_Mst': # from trainer_wo_Mst import train, validate # elif args.ablation == 'wo_confusion': # from trainer_wo_confusion import train, validate # elif args.ablation == 'wo_category_confusion': # from trainer_wo_category_confusion import train, validate # 将每一个epoch洗牌后的序列固定, 以使多次训练的过程中不发生较大的变化(到同一个epoch时会得到同样的模型) # 师兄说不固定也问题不大,他一般都没固定 # if args.seed != 666: # if torch.cuda.is_available(): # torch.cuda.manual_seed(args.seed) # torch.manual_seed(args.seed) # else: # torch.manual_seed(args.seed) # else: # if torch.cuda.is_available(): # torch.cuda.manual_seed(666) # torch.manual_seed(args.seed) # else: # torch.manual_seed(666) # init models, multi GPU # model = nn.DataParallel(resnet(args)) # multi-GPU feature_extractor = nn.DataParallel(Extractor(args)) class_classifier = nn.DataParallel( Class_classifier(2048, num_classes=args.num_classes) ) # 512 for ResNet18 and 32, 2048 for ResNet50 domain_classifier = nn.DataParallel( Domain_classifier(2048, hidden_size=128)) # print(id(model.module)) # check_model([3, 200, 200], Extractor(args)) if torch.cuda.is_available(): # model = model.cuda() feature_extractor = feature_extractor.cuda() class_classifier = class_classifier.cuda() domain_classifier = domain_classifier.cuda() # optimizer for multi gpu optimizer = torch.optim.SGD( [{ 'params': feature_extractor.module.parameters(), 'name': 'pre-trained' }, { 'params': class_classifier.module.parameters(), 'name': 'new-added' }, { 'params': domain_classifier.module.parameters(), 'name': 'new-added' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) best_prec1 = 0 if args.resume: if os.path.isfile(args.resume): print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) else: raise ValueError('The file to be resumed is not exited', args.resume) train_loader_source, train_loader_target, val_loader_target = generate_dataloader( args) print('Begin training') print(len(train_loader_source), len(train_loader_target)) train_loader_source_batches = enumerate(train_loader_source) train_loader_target_batches = enumerate(train_loader_target) if torch.cuda.is_available(): criterion_y = nn.CrossEntropyLoss().cuda() criterion_d = nn.CrossEntropyLoss().cuda() # not used in this code else: criterion_y = nn.CrossEntropyLoss() criterion_d = nn.CrossEntropyLoss() writer = SummaryWriter(log_dir=args.log) # for epoch in range(args.start_epoch, args.epochs): epoch = args.start_epoch epochs_has_not_been_improved = 0 maximum_gap = 0 while epoch < args.epochs: # train for one epoch # pred1_acc_train, loss = train(train_loader_source, train_loader_source_batches, train_loader_target, # train_loader_target_batches, model, criterion_y, criterion_d, optimizer_C, optimizer_G, epoch, args) # pred1_acc_train, loss_C, loss_G = train(train_loader_source, train_loader_source_batches, train_loader_target, train_loader_target_batches, model, criterion_y, criterion_d, optimizer_C, optimizer_G, epoch, args) # pred1_acc_train, loss_C, loss_G, new_epoch_flag = train(train_loader_source, train_loader_source_batches, train_loader_target, train_loader_target_batches, model, criterion_y, criterion_d, optimizer_C, optimizer_G, epoch, args) # train_loader_source_batches, train_loader_target_batches, epoch, pred1_acc_train, loss_C, loss_G, new_epoch_flag = train(train_loader_source, train_loader_source_batches, train_loader_target, train_loader_target_batches, model, criterion_y, criterion_d, optimizer_C, optimizer_G, epoch, args) # -------------尚未更新(开始),可能会有错误------------- # -------------尚未更新(结束),可能会有错误------------- train_loader_source_batches, train_loader_target_batches, epoch, pred1_acc_train, loss_C, loss_G, new_epoch_flag = train( train_loader_source, train_loader_source_batches, train_loader_target, train_loader_target_batches, feature_extractor, class_classifier, domain_classifier, criterion_y, criterion_d, optimizer, epoch, args) if new_epoch_flag: # 测试一下如果没有这两个语句,会不会出现异常 # train_loader_source_batches = enumerate(train_loader_source) # (inputs_source, labels_source) = train_loader_source_batches.__next__()[1] # evaluate on the val data if epoch % args.test_freq == (args.test_freq - 1): # prec1, _ = validate(None, val_loader_target, model, criterion_y, criterion_d, epoch, args) prec1, _ = validate(None, val_loader_target, feature_extractor, class_classifier, domain_classifier, criterion_y, criterion_d, epoch, args) is_best = prec1 > best_prec1 if is_best: epochs_has_not_been_improved = 0 best_prec1 = prec1 with open(os.path.join(args.log, 'log.txt'), 'a') as fp: fp.write(' \nTarget_T1 acc: %3f' % (best_prec1)) else: epochs_has_not_been_improved += 1 writer.add_scalars('data/scalar_group', { 'pred1_acc_valid': prec1, 'best_prec1': best_prec1 }, epoch) # updating the maximum distance between current and best current_gap = best_prec1 - prec1 if current_gap > maximum_gap: maximum_gap = current_gap save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, # 'model_state_dict': model.state_dict(), 'feature_extractor_state_dict': feature_extractor.state_dict(), 'class_classifier_state_dict': class_classifier.state_dict(), 'domain_classifier_state_dict': domain_classifier.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict() }, is_best, args, epoch + 1) writer.close() end_time = datetime.now() end_time_str = datetime.strptime(drop_msecond(end_time), "%Y-%m-%d %H:%M:%S") through_time = end_time - start_time through_time_str = time_delta2str(through_time) with open(os.path.join(args.result, 'overview.txt'), 'a') as fp: fp.write( '%s: \nbest_prec1:%.2f%%, epochs_has_not_been_improved:%d, maximum distance between current and best:%.2f%%\n\ start at %s, finish at %s, it takes %s \n' % (args.log.split('/')[1], best_prec1, epochs_has_not_been_improved, maximum_gap, start_time_str, end_time_str, through_time_str))
def main(): global args, best_prec1 args = opts() # args = parser.parse_args() model = resnet(args) # define-multi GPU model = torch.nn.DataParallel(model).cuda() print(model) # define loss function(criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # optimizer = torch.optim.SGD(model.parameters(), # train with stanford dogs from scratch if args.new_fc: optimizer = torch.optim.SGD( [ { 'params': model.module.conv1.parameters(), 'lr': args.lr, 'name': 'pre-trained' }, { 'params': model.module.bn1.parameters(), 'lr': args.lr, 'name': 'pre-trained' }, { 'params': model.module.layer1.parameters(), 'lr': args.lr, 'name': 'pre-trained' }, { 'params': model.module.layer2.parameters(), 'lr': args.lr, 'name': 'pre-trained' }, { 'params': model.module.layer3.parameters(), 'lr': args.lr, 'name': 'pre-trained' }, { 'params': model.module.layer4.parameters(), 'lr': args.lr, 'name': 'pre-trained' }, # {'params': model.module.fc.parameters(), 'lr': args.lr, 'name': 'pre-trained'} { 'params': model.module.fc.parameters(), 'lr': args.lr, 'name': 'new-added' } ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) # args.start_epoch = checkpoint['epoch'] # best_prec1 = checkpoint['best_prec1'] model_state_dict = checkpoint['target_state_dict'] model_state_dict_tmp = copy.deepcopy(model_state_dict) if args.new_fc: model_state_dict_init = model.state_dict() for k_tmp in model_state_dict_tmp.keys(): if k_tmp.find('.resnet_conv') != -1: k = k_tmp.replace('.resnet_conv', '') model_state_dict[k] = model_state_dict.pop(k_tmp) if args.new_fc: # initialize fc layer if k_tmp.find('.fc') != -1: model_state_dict[k_tmp] = model_state_dict_init[k_tmp] model.load_state_dict(model_state_dict) # optimizer.load_state_dict(checkpoint['optimizer']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) else: raise ValueError('The file to be resumed from is not exited', args.resume) # else: if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'w') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() cudnn.benchmark = True # process the data and prepare the dataloaders. train_loader, val_loader = generate_dataloader(args) #test only if args.test_only: validate(val_loader, model, criterion, -1, args) return for epoch in range(args.start_epoch, args.epochs): # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on the val data prec1 = validate(val_loader, model, criterion, epoch, args) # record the best prec1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: log = open(os.path.join(args.log, 'log.txt'), 'a') log.write(' \nTop1 acc: %3f' % (best_prec1)) log.close() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args)
from torch.autograd import Variable import sys sys.path.append('..') import numpy as np from skimage import feature import time from opts import opts from utils import compute_par, check_params, check_model from data.prepare_data import generate_dataloader, my_dataset from model import Model # from test2 import test args = opts() torch.manual_seed(777) train_loader_source, train_loader_target = generate_dataloader(args) # print(len(train_loader_source), len(train_loader_target)) model_source, model_target = Model(args) # check_params(model_source, model_target) # check_model([1,28,28],model_source) # torch.manual_seed(777) # test(train_loader_source) # torch.manual_seed(777) # test(train_loader_source) if torch.cuda.is_available(): criterion = nn.CrossEntropyLoss().cuda() else: criterion = nn.CrossEntropyLoss() fake_labels = torch.LongTensor(args.batch_size).fill_(0)
def main(): global args, best_prec1 args = opts() current_epoch = 0 # define base model model = resnet(args) # define multi-GPU model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() criterion_afem = AdaptiveFilteringEMLossForTarget(eps=args.eps).cuda() np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.device_count() > 1: torch.cuda.manual_seed_all(args.seed) # apply different learning rates to different layers lr_fe = args.lr * 0.1 if args.pretrained else args.lr if args.arch.find('resnet') != -1: params_list = [ { 'params': model.module.conv1.parameters(), 'lr': lr_fe }, { 'params': model.module.bn1.parameters(), 'lr': lr_fe }, { 'params': model.module.layer1.parameters(), 'lr': lr_fe }, { 'params': model.module.layer2.parameters(), 'lr': lr_fe }, { 'params': model.module.layer3.parameters(), 'lr': lr_fe }, { 'params': model.module.layer4.parameters(), 'lr': lr_fe }, { 'params': model.module.fc1.parameters() }, { 'params': model.module.fc2.parameters() }, ] if args.optimizer == 'sgd': optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.lr_scheduler == 'dann': lr_lambda = lambda epoch: 1 / pow( (1 + 10 * epoch / args.epochs), 0.75) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1) elif args.lr_scheduler == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.epochs, eta_min=0, last_epoch=-1) elif args.lr_scheduler == 'step': lr_lambda = lambda epoch: args.gamma**( epoch + 1 > args.decay_epoch[ 1] and 2 or epoch + 1 > args.decay_epoch[0] and 1 or 0) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1) else: raise ValueError('Unavailable model architecture!!!') if args.resume: print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) current_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'a') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() # start time log = open(os.path.join(args.log, 'log.txt'), 'a') log.write('\n-------------------------------------------\n') log.write(time.asctime(time.localtime(time.time()))) log.write('\n-------------------------------------------') log.close() cudnn.benchmark = True # process data and prepare dataloaders train_loader_source, train_loader_target, val_loader_target, val_loader_source = generate_dataloader( args) if args.eval_only: prec1 = evaluate(val_loader_target, model, criterion, -1, args) print(' * Eval acc@1: {:.3f}'.format(prec1)) return print('begin training') train_loader_source_batch = enumerate(train_loader_source) train_loader_target_batch = enumerate(train_loader_target) batch_number = count_epoch_on_large_dataset(train_loader_target, train_loader_source) num_itern_total = args.epochs * batch_number test_freq = int(num_itern_total / 200) print('test_freq: ', test_freq) args.start_epoch = current_epoch cs_1 = Variable( torch.cuda.FloatTensor(args.num_classes, model.module.feat1_dim).fill_(0)) ct_1 = Variable( torch.cuda.FloatTensor(args.num_classes, model.module.feat1_dim).fill_(0)) cs_2 = Variable( torch.cuda.FloatTensor(args.num_classes, model.module.feat2_dim).fill_(0)) ct_2 = Variable( torch.cuda.FloatTensor(args.num_classes, model.module.feat2_dim).fill_(0)) for itern in range(args.start_epoch * batch_number, num_itern_total): # train for one iteration train_loader_source_batch, train_loader_target_batch, cs_1, ct_1, cs_2, ct_2 = train_compute_class_mean( train_loader_source, train_loader_source_batch, train_loader_target, train_loader_target_batch, model, criterion, criterion_afem, optimizer, itern, current_epoch, cs_1, ct_1, cs_2, ct_2, args) # evaluate on target if (itern + 1) % batch_number == 0 or (itern + 1) % test_freq == 0: prec1 = evaluate(val_loader_target, model, criterion, current_epoch, args) # record the best prec1 is_best = prec1 > best_prec1 if is_best: best_prec1 = prec1 log = open(os.path.join(args.log, 'log.txt'), 'a') log.write( '\n best acc: %3f' % (best_prec1)) log.close() # update learning rate if (itern + 1) % batch_number == 0: scheduler.step() current_epoch += 1 # save checkpoint save_checkpoint( { 'epoch': current_epoch, 'arch': args.arch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'best_prec1': best_prec1, }, is_best, args) if current_epoch > args.stop_epoch: break # end time log = open(os.path.join(args.log, 'log.txt'), 'a') log.write('\n * best acc: %3f' % best_prec1) log.write('\n-------------------------------------------\n') log.write(time.asctime(time.localtime(time.time()))) log.write('\n-------------------------------------------\n') log.close()
def Process5_Final_Result(args): ############################# Image Level Classifier ############################# log_now = args.dataset + '/Image_Classifier' process_name = 'image_classifier' model = Model_Construct(args, process_name) model = torch.nn.DataParallel(model).cuda() pre_trained_model = log_now + '/model_best.pth.tar' checkpoint = torch.load(pre_trained_model) model.load_state_dict(checkpoint['state_dict']) train_loader, val_loader = generate_dataloader(args, process_name, -1) download_scores(val_loader, model, log_now, process_name, args) ############################# PartNet ############################################ log_now = args.dataset + '/PartNet' process_name = 'partnet' model = Model_Construct(args, process_name) model = torch.nn.DataParallel(model).cuda() pre_trained_model = log_now + '/model_best.pth.tar' checkpoint = torch.load(pre_trained_model) model.load_state_dict(checkpoint['state_dict']) train_loader, val_loader = generate_dataloader(args, process_name) download_scores(val_loader, model, log_now, process_name, args) ############################# Three Part Level Classifiers ####################### for i in range( args.num_part ): ### if the process is break in this section, more modification is needed. log_now = args.dataset + '/Part_Classifiers_' + str(i) process_name = 'part_classifiers' model = Model_Construct(args, process_name) model = torch.nn.DataParallel(model).cuda() pre_trained_model = log_now + '/model_best.pth.tar' checkpoint = torch.load(pre_trained_model) model.load_state_dict(checkpoint['state_dict']) train_loader, val_loader = generate_dataloader(args, process_name, i) download_scores(val_loader, model, log_now, process_name, args) log_image = args.dataset + '/Image_Classifier' process_image = 'image_classifier' log_partnet = args.dataset + '/PartNet' process_partnet = 'partnet' log_part0 = args.dataset + '/Part_Classifiers_' + str(0) process_part0 = 'part_classifiers' log_part1 = args.dataset + '/Part_Classifiers_' + str(1) process_part1 = 'part_classifiers' log_part2 = args.dataset + '/Part_Classifiers_' + str(2) process_part2 = 'part_classifiers' image_table = torch.load(log_image + '/' + process_image + '.pth.tar') image_probability = image_table['scores'] labels = image_table['labels'] partnet_table = torch.load(log_partnet + '/' + process_partnet + '.pth.tar') partnet_probability = partnet_table['scores'] ####################### part0_table = torch.load(log_part0 + '/' + process_part0 + '.pth.tar') part0_probability = part0_table['scores'] ########################## part1_table = torch.load(log_part1 + '/' + process_part1 + '.pth.tar') part1_probability = part1_table['scores'] ########################## part2_table = torch.load(log_part2 + '/' + process_part2 + '.pth.tar') part2_probability = part2_table['scores'] ########################## probabilities_group = [] probabilities_group.append(image_probability) probabilities_group.append(part0_probability) probabilities_group.append(part1_probability) probabilities_group.append(part2_probability) probabilities_group.append(partnet_probability) count = 0 for i in range(len(labels)): probability = probabilities_group[0][i] for j in range(len(probabilities_group)): probability = probabilities_group[j][i] + probability probability = probability - probabilities_group[0][i] label = labels[i] value, index = probability.sort(0, descending=True) if index[0] == label: count = count + 1 top1 = count / len(labels) print('the final results obtained by averaging part0-1-2 image partnet is', top1)
def Process2_PartNet(args): log_now = args.dataset + '/PartNet' process_name = 'partnet' if os.path.isfile(log_now + '/final.txt'): print('the Process2_PartNet is finished') return best_prec1 = 0 model = Model_Construct(args, process_name) model = torch.nn.DataParallel(model).cuda() criterion = nn.BCELoss().cuda() # print(model) # print('the learning rate for the new added layer is set to 1e-3 to slow down the speed of learning.') optimizer = torch.optim.SGD( [{ 'params': model.module.conv_model.parameters(), 'name': 'pre-trained' }, { 'params': model.module.classification_stream.parameters(), 'name': 'new-added' }, { 'params': model.module.detection_stream.parameters(), 'name': 'new-added' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) start_epoch = args.start_epoch if args.resume: if os.path.isfile(args.resume): print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) args.resume = '' else: raise ValueError('The file to be resumed from is not exited', args.resume) else: if not os.path.isdir(log_now): os.makedirs(log_now) log = open(os.path.join(log_now, 'log.txt'), 'w') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() cudnn.benchmark = True train_loader, val_loader = generate_dataloader(args, process_name, -1) if args.test_only: validate(val_loader, model, criterion, 2000, args) for epoch in range(start_epoch, args.epochs): # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_now, process_name, args) # evaluate on the val data prec1 = validate(val_loader, model, criterion, epoch, log_now, process_name, args) # record the best prec1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: log = open(os.path.join(log_now, 'log.txt'), 'a') log.write("best acc %3f" % (best_prec1)) log.close() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, log_now) svb_timer = time.time() if args.svb and epoch != (args.epochs - 1): svb(model, args) print( '!!!!!!!!!!!!!!!!!! the svb constrain is only applied on the classification stream.' ) svb_det(model, args) print('the svb time is: ', time.time() - svb_timer) #download_scores(val_loader, model, log_now, process_name, args) log = open(os.path.join(log_now, 'final.txt'), 'w') log.write("best acc %3f" % (best_prec1)) log.close()
def Process4_Part_Classifiers(args): for i in range( args.num_part ): ### if the process is break in this section, more modification is needed. log_now = args.dataset + '/Part_Classifiers_' + str(i) process_name = 'part_classifiers' if os.path.isfile(log_now + '/final.txt'): print('the Process4_Part_Classifier is finished', i) continue best_prec1 = 0 model = Model_Construct(args, process_name) model = torch.nn.DataParallel(model).cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD( [{ 'params': model.module.base_conv.parameters(), 'name': 'pre-trained' }, { 'params': model.module.fc.parameters(), 'lr': args.lr, 'name': 'new-added' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) log_image_model = args.dataset + '/Image_Classifier/model_best.pth.tar' checkpoint = torch.load(log_image_model) model.load_state_dict(checkpoint['state_dict']) print('load the cub fine-tuned model from:', log_image_model) start_epoch = args.start_epoch if args.resume: if os.path.isfile(args.resume): print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) args.resume = '' else: raise ValueError('The file to be resumed from is not exited', args.resume) else: if not os.path.isdir(log_now): os.makedirs(log_now) log = open(os.path.join(log_now, 'log.txt'), 'w') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() cudnn.benchmark = True train_loader, val_loader = generate_dataloader(args, process_name, i) if args.test_only: validate(val_loader, model, criterion, 2000, args) for epoch in range(start_epoch, args.epochs_part): # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_now, process_name, args) # evaluate on the val data prec1 = validate(val_loader, model, criterion, epoch, log_now, process_name, args) # record the best prec1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: log = open(os.path.join(log_now, 'log.txt'), 'a') log.write("best acc %3f" % (best_prec1)) log.close() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, log_now) #download_scores(val_loader, model, log_now, process_name, args) log = open(os.path.join(log_now, 'final.txt'), 'w') log.write("best acc %3f" % (best_prec1)) log.close()
def Process3_Download_Proposals(args): log_now = args.dataset + '/Download_Proposals' process_name = 'download_proposals' if os.path.isfile(log_now + '/final.txt'): print('the Process3_download proposals is finished') return model = Model_Construct(args, process_name) model = torch.nn.DataParallel(model).cuda() optimizer = torch.optim.SGD( [{ 'params': model.module.conv_model.parameters(), 'name': 'pre-trained' }, { 'params': model.module.classification_stream.parameters(), 'name': 'new-added' }, { 'params': model.module.detection_stream.parameters(), 'name': 'new-added' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) log_partnet_model = args.dataset + '/PartNet/model_best.pth.tar' checkpoint = torch.load(log_partnet_model) model.load_state_dict(checkpoint['state_dict']) print('load the pre-trained partnet model from:', log_partnet_model) if args.resume: if os.path.isfile(args.resume): print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) args.resume = '' else: raise ValueError('The file to be resumed from is not exited', args.resume) else: if not os.path.isdir(log_now): os.makedirs(log_now) log = open(os.path.join(log_now, 'log.txt'), 'w') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() cudnn.benchmark = True train_loader, val_loader = generate_dataloader(args, process_name) for epoch in range(1): download_part_proposals(train_loader, model, epoch, log_now, process_name, 'train', args) best_prec1 = download_part_proposals(val_loader, model, epoch, log_now, process_name, 'val', args) log = open(os.path.join(log_now, 'final.txt'), 'w') log.write("best acc %3f" % (best_prec1)) log.close()
def main(): global args, best_prec1 args = opts() if args.arch.find('resnet') != -1: model = resnet(args) else: raise ValueError('Unavailable model architecture!!!') # define-multi GPU model = torch.nn.DataParallel(model).cuda() print(model) # define loss function (criterion) and optimizer source_adv_loss = DiscAdvLossForSource_PartialDA().cuda() if args.disc_tar: target_adv_min_loss = DiscAdvLossForTarget_min(nClass=args.num_classes_s).cuda() target_adv_max_loss = DiscAdvLossForTarget_max(nClass=args.num_classes_s).cuda() else: target_adv_min_loss = AdvLossForTarget_min().cuda() target_adv_max_loss = AdvLossForTarget_max().cuda() target_em_loss = EMLossForTarget().cuda() criterion = nn.CrossEntropyLoss().cuda() np.random.seed(1) # fix the test data. random.seed(1) # apply different learning rates to different layers if args.arch.find('resnet') != -1: if args.arch.find('50') != -1: layer_index = 159 elif args.arch.find('101') != -1: layer_index = 312 elif args.arch.find('152') != -1: layer_index = 465 else: raise ValueError('Undefined layer index!!!') optimizer = torch.optim.SGD([ {'params': model.module.conv1.parameters(), 'name': 'pre-trained'}, {'params': model.module.bn1.parameters(), 'name': 'pre-trained'}, {'params': model.module.layer1.parameters(), 'name': 'pre-trained'}, {'params': model.module.layer2.parameters(), 'name': 'pre-trained'}, {'params': model.module.layer3.parameters(), 'name': 'pre-trained'}, {'params': model.module.layer4.parameters(), 'name': 'pre-trained'}, {'params': model.module.fc.parameters(), 'name': 'pre-trained'} ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=False ) else: raise ValueError('Unavailable model architecture!!!') # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("==> Loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("==> Loaded checkpoint '{}'(epoch {})" .format(args.resume, checkpoint['epoch'])) else: raise ValueError('The file to be resumed from is not existed', args.resume) if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'a') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() cudnn.benchmark = True # process the data and prepare the dataloaders. source_train_loader, target_train_loader, source_val_loader, target_val_loader = generate_dataloader(args) #test only if args.test_only: validate(target_val_loader, model, criterion, -1, args) return # start time log = open(os.path.join(args.log, 'log.txt'), 'a') log.write('\n-------------------------------------------\n') log.write(time.asctime(time.localtime(time.time()))) log.write('\n-------------------------------------------') log.close() current_epoch = 0 print('Begin training') epoch_count_dataset = 'target' batch_number_t = len(target_train_loader) batch_number = batch_number_t batch_number_s = len(source_train_loader) if batch_number_s > batch_number_t: epoch_count_dataset = 'source' batch_number = batch_number_s if args.train_by_iter: num_iter_total = args.epochs else: num_iter_total = args.epochs * batch_number test_interval = int(num_iter_total / args.test_time) source_train_loader_batch = enumerate(source_train_loader) target_train_loader_batch = enumerate(target_train_loader) class_weight = torch.cuda.FloatTensor(args.num_classes_s).fill_(1) for epoch in range(args.start_epoch, num_iter_total): # train for one epoch source_train_loader_batch, target_train_loader_batch, current_epoch = train(source_train_loader, source_train_loader_batch, target_train_loader, target_train_loader_batch, model, source_adv_loss, target_adv_min_loss, target_adv_max_loss, target_em_loss, optimizer, test_interval, epoch, current_epoch, epoch_count_dataset, class_weight, layer_index, args) # evaluate on the val data if (epoch + 1) % test_interval == 0: prec1, class_weight = validate(target_val_loader, model, criterion, current_epoch, args) print('Class weight: ', class_weight) # record the best top-1 precision and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: log = open(os.path.join(args.log, 'log.txt'), 'a') log.write('\nBest accuracy till now: %3f' % (best_prec1)) log.close() save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best, args) # early stop if args.train_by_iter: this_loop = epoch else: this_loop = current_epoch if this_loop > args.stop_epoch: break print(' * best_prec1: %3f' % best_prec1) # best result and end time log = open(os.path.join(args.log, 'log.txt'), 'a') log.write('\n * best_prec1: %3f' % best_prec1) log.write('\n-------------------------------------------\n') log.write(time.asctime(time.localtime(time.time()))) log.write('\n-------------------------------------------\n') log.close()
def main(): args = opts() # 将每一个epoch洗牌后的序列固定, 以使多次训练的过程中不发生较大的变化(到同一个epoch时会得到同样的模型) if args.seed != 666: if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) else: torch.manual_seed(args.seed) else: if torch.cuda.is_available(): torch.cuda.manual_seed(666) else: torch.manual_seed(666) model_source, model_target = CoGAN(args) # define-multi GPU model_source = torch.nn.DataParallel(model_source).cuda() model_target = torch.nn.DataParallel(model_target).cuda() print('the memory id should be same') print(id(model_source.module.resnet_conv)) # the memory is shared here print(id(model_target.module.resnet_conv)) print('the memory id should be different') print(id(model_source.module.fc)) # the memory id shared here. print(id(model_target.module.fc)) # define loss function(criterion) and optimizer if torch.cuda.is_available(): criterion_d = nn.CrossEntropyLoss().cuda() else: criterion_d = nn.CrossEntropyLoss() best_par = 0 # To apply different learning rate to different layer """ optimizer这里还没有修改""" if args.pretrained: print('the pretrained setting of optimizer') if args.auxiliary_dataset == 'imagenet': optimizer = torch.optim.SGD([ { 'params': model_source.module.resnet_conv.parameters(), 'name': 'pre-trained' }, { 'params': model_source.module.fc.parameters(), 'name': 'pre-trained' }, { 'params': model_target.module.fc.parameters(), 'name': 'new-added' }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.auxiliary_dataset == 'l_bird': optimizer = torch.optim.SGD([ { 'params': model_source.module.resnet_conv.parameters(), 'name': 'pre-trained' }, { 'params': model_source.module.fc.parameters(), 'name': 'new-added' }, { 'params': model_target.module.fc.parameters(), 'name': 'new-added' }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: print('the from scratch setting of optimizer') optimizer = torch.optim.SGD([ { 'params': model_source.module.resnet_conv.parameters(), 'name': 'new-added' }, { 'params': model_source.module.fc.parameters(), 'name': 'new-added' }, { 'params': model_target.module.fc.parameters(), 'name': 'new-added' }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer_target = torch.optim.SGD([ # {'params': model_target.module.resnet_conv.parameters(), 'name': 'pre-trained'}, # {'params': model_target.module.fc.parameters(), 'lr': args.lr*10, 'name': 'new-added'} # ], # lr=args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): # raise ValueError('the resume function is not finished') print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_par = checkpoint['best_par'] model_source.load_state_dict(checkpoint['source_state_dict']) model_target.load_state_dict(checkpoint['target_state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) else: raise ValueError('The file to be resumed from is not exited', args.resume) else: if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'w') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() cudnn.benchmark = True # process the data and prepare the dataloaders. train_loader_source, train_loader_target = generate_dataloader(args) # test only if not args.train: par_test = test(model_source, model_target, criterion_d, epoch, args) return print('begin training') train_loader_source_batch = enumerate(train_loader_source) train_loader_target_batch = enumerate(train_loader_target) writer = SummaryWriter(log_dir=args.log) for epoch in range(args.start_epoch, args.epochs): # train for one epoch par_train, loss = train(train_loader_source, train_loader_source_batches, train_loader_target, train_loader_target_batches, model_source, model_target, criterion_d, optimizer, epoch, args) writer.add_scalars('data/scalar_group', { 'par_train': pred1_acc_train, 'loss': loss }, epoch) # evaluate on the test data if (epoch + 1) % args.test_freq == 0: par_test = test(model_source, model_target, criterion_d, epoch, args) writer.add_scalars('data/scalar_group', {'par_test': par_test}, epoch) is_best = par_test > best_par if is_best: best_par = par_test with open(os.path.join(args.log, 'log.txt'), 'a') as fp: fp.write(' \nTarget_T1 acc: %3f' % (best_par)) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'model_state_dict': model.state_dict(), 'best_par': best_par, 'optimizer': optimizer.state_dict() }, is_best, args, epoch + 1) writer.close()
def main(): global args, best_prec1, current_epoch, epoch_count_dataset current_epoch = 0 epoch_count_dataset = 'source' args = opts() # ipdb.set_trace() # args = parser.parse_args() model_source = Model_Construct(args) # define-multi GPU model_source = torch.nn.DataParallel(model_source).cuda() # define loss function(criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() criterion_bce = nn.BCEWithLogitsLoss().cuda() np.random.seed(1) ### fix the test data. random.seed(1) # optimizer = torch.optim.SGD(model.parameters(), # To apply different learning rate to different layer if args.domain_feature == 'original': print('domain feature is original') optimizer_feature = torch.optim.SGD( [{ 'params': model_source.module.base_conv.parameters(), 'name': 'conv' }, { 'params': model_source.module.domain_classifier.parameters(), 'name': 'do_cl' }, { 'params': model_source.module.fc.parameters(), 'name': 'ca_cl' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_domain = torch.optim.SGD( [{ 'params': model_source.module.base_conv.parameters(), 'name': 'conv' }, { 'params': model_source.module.domain_classifier.parameters(), 'name': 'do_cl' }, { 'params': model_source.module.fc.parameters(), 'name': 'ca_cl' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.domain_feature == 'full_bilinear' or args.domain_feature == 'random_bilinear': print('the domain feature is full bilinear') optimizer_feature = torch.optim.SGD( [{ 'params': model_source.module.base_conv.parameters(), 'name': 'conv' }, { 'params': model_source.module.domain_classifier.parameters(), 'name': 'do_cl' }, { 'params': model_source.module.fc.parameters(), 'name': 'ca_cl' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_domain = torch.optim.SGD( [{ 'params': model_source.module.base_conv.parameters(), 'name': 'conv' }, { 'params': model_source.module.domain_classifier.parameters(), 'name': 'do_cl' }, { 'params': model_source.module.fc.parameters(), 'name': 'ca_cl' }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: raise ValueError('the requested domain feature is not available', args.domain_feature) if args.resume: if os.path.isfile(args.resume): # raise ValueError('the resume function is not finished') print("==> loading checkpoints '{}'".format(args.resume)) checkpoint = torch.load(args.resume) current_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model_source.load_state_dict(checkpoint['source_state_dict']) print("==> loaded checkpoint '{}'(epoch {})".format( args.resume, checkpoint['epoch'])) else: raise ValueError('The file to be resumed from is not exited', args.resume) else: if not os.path.isdir(args.log): os.makedirs(args.log) log = open(os.path.join(args.log, 'log.txt'), 'w') state = {k: v for k, v in args._get_kwargs()} log.write(json.dumps(state) + '\n') log.close() log = open(os.path.join(args.log, 'log.txt'), 'a') local_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) log.write(local_time) log.close() cudnn.benchmark = True # process the data and prepare the dataloaders. # train_loader_source, val_loader_source, train_loader_target, val_loader_target = generate_dataloader(args) # train_loader, val_loader = generate_dataloader(args) train_loader_source, train_loader_target, val_loader_target, val_loader_source = generate_dataloader( args) # print('this is the first validation') # validate(val_loader_source, val_loader_target, model_source, model_target, criterion, 0, args) print('begin training') train_loader_source_batch = enumerate(train_loader_source) train_loader_target_batch = enumerate(train_loader_target) batch_number_s = len(train_loader_source) batch_number_t = len(train_loader_target) if batch_number_s < batch_number_t: epoch_count_dataset = 'target' for epoch in range(args.start_epoch, 1000000000000000000): # train for one epoch train_loader_source_batch, train_loader_target_batch, current_epoch, new_epoch_flag = train( train_loader_source, train_loader_source_batch, train_loader_target, train_loader_target_batch, model_source, criterion, criterion_bce, optimizer_feature, optimizer_domain, epoch, args, current_epoch, epoch_count_dataset) # train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on the val data if new_epoch_flag: prec1 = validate(val_loader_target, model_source, criterion, current_epoch, args) # prec1 = 1 # record the best prec1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: log = open(os.path.join(args.log, 'log.txt'), 'a') log.write(' Target_T1 acc: %3f' % (best_prec1)) log.close() save_checkpoint( { 'epoch': current_epoch + 1, 'arch': args.arch, 'source_state_dict': model_source.state_dict(), 'best_prec1': best_prec1, }, is_best, args) if (current_epoch + 1) % args.domain_freq == 0: download_domain_scores(val_loader_target, val_loader_source, model_source, criterion, current_epoch, args) if current_epoch > args.epochs: break