def load_weight_file_for_test(model, params): """ choose the weight file for test process """ if params.loadfile != '': modelfile = params.loadfile checkpoint_dir = params.loadfile else: checkpoint_dir = params.checkpoint_dir # checkpoint path if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) elif params.method in ['baseline', 'baseline++']: modelfile = get_resume_file(checkpoint_dir) else: modelfile = get_best_file( checkpoint_dir) # return the best.tar file assert modelfile, "can not find model weight file in {}".format( checkpoint_dir) print("use model weight file: ", modelfile) if params.method in ['maml', 'maml_approx']: if modelfile is not None: tmp = torch.load(modelfile) state = tmp['state'] state_keys = list(state.keys()) for i, key in enumerate(state_keys): if "feature." in key: newkey = key.replace( "feature.", "" ) # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx' state[newkey] = state.pop(key) else: state.pop(key) model.feature.load_state_dict(tmp['state']) else: ## eg: for Protonet and others tmp = torch.load(modelfile) state = tmp['state'] state_keys = list(state.keys()) for i, key in enumerate(state_keys): if "feature." in key: newkey = key.replace( "feature.", "" ) # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx' state[newkey] = state.pop(key) else: state.pop(key) ## for protonets model.feature.load_state_dict(state) model.eval() model = model.cuda() model.eval() return model
def baseline_s2m2_init(self, params): checkpoint_dir = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, self.base_dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' if params.method not in ['baseline', 'baseline++', 'S2M2_R']: checkpoint_dir += '_%dway_%dshot' % (params.train_n_way, params.n_shot) if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) else: modelfile = get_best_file(checkpoint_dir) self.num_classes = 200 if params.method == 'baseline++': model = BaselineTrain(model_dict[params.model], self.num_classes, loss_type='dist') elif params.method == 'baseline': model = BaselineTrain(model_dict[params.model], self.num_classes, loss_type='softmax') elif params.method == 'S2M2_R': model = wrn28_10(200) model = model.cuda() if params.method == 'S2M2_R': tmp = torch.load(modelfile) state = tmp['state'] # Update feature extractor model_dict_load = model.state_dict() model_dict_load.update(state) model.load_state_dict(model_dict_load) # Update classifier cls_dict = model.linear.state_dict() cls_dict.update(tmp['classifier']) model.linear.load_state_dict(cls_dict) else: tmp = torch.load(modelfile) state = tmp['state'] model.load_state_dict(state) model.eval() self.model = model if self.method == 'S2M2_R': self.feat_dim = 640 self.image_size = 80 self.batch_size = 16 else: self.feat_dim = 512 self.image_size = 224 self.batch_size = 64
params.prepared_task, params.selection_epoch, params.total_image_num - params.val_image_num) if params.dataset == "random_selected_data": checkpoint_dir += "_{0}train_image".format(params.total_image_num - params.val_image_num) if params.dataset == "same_num_data": checkpoint_dir += "_{0}class_{1}image".format( params.used_class_num, params.every_class_image_num) if not params.method in ['baseline', 'baseline++']: try: checkpoint_dir2 = checkpoint_dir + '_%dway_%dshot' % ( params.train_n_way, params.n_shot) torch.load(get_best_file(checkpoint_dir2)) checkpoint_dir = checkpoint_dir2 except: checkpoint_dir += '_%dway_%dshot' % (params.train_n_way, 5) #modelfile = get_resume_file(checkpoint_dir) if not params.method in ['baseline', 'baseline++']: if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) else: modelfile = get_best_file(checkpoint_dir) if modelfile is not None: tmp = torch.load(modelfile) model.load_state_dict(tmp['state'])
def meta_test(novel_loader, n_query=15, pretrained_dataset='miniImageNet', freeze_backbone=False, n_pseudo=100, n_way=5, n_support=5): correct = 0 count = 0 iter_num = len(novel_loader) acc_all = [] for ti, (x, y) in enumerate(novel_loader): ############################################################################################### # load pretrained model on miniImageNet if params.method == 'protonet': pretrained_model = ProtoNet(model_dict[params.model], n_way=n_way, n_support=n_support) checkpoint_dir = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, pretrained_dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' checkpoint_dir += '_5way_5shot' params.save_iter = -1 if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) else: modelfile = get_best_file(checkpoint_dir) tmp = torch.load(modelfile) state = tmp['state'] pretrained_model.load_state_dict(state) pretrained_model.cuda() ############################################################################################### # split data into support set and query set n_query = x.size(1) - n_support x = x.cuda() x_var = Variable(x) support_size = n_way * n_support y_a_i = Variable(torch.from_numpy(np.repeat( range(n_way), n_support))).cuda() # (25,) x_b_i = x_var[:, n_support:, :, :, :].contiguous().view( n_way * n_query, *x.size()[2:]) # query set x_a_i = x_var[:, :n_support, :, :, :].contiguous().view( n_way * n_support, *x.size()[2:]) # support set if freeze_backbone == False: ############################################################################################### # Finetune components initialization pseudo_q_genrator = PseudoQeuryGenerator(n_way, n_support, n_pseudo) delta_opt = torch.optim.Adam( filter(lambda p: p.requires_grad, pretrained_model.parameters())) ############################################################################################### # finetune process finetune_epoch = 100 fine_tune_n_query = n_pseudo // n_way pretrained_model.n_query = fine_tune_n_query pretrained_model.train() z_support = x_a_i.view(n_way, n_support, *x_a_i.size()[1:]) for epoch in range(finetune_epoch): delta_opt.zero_grad() # generate pseudo query images psedo_query_set, _ = pseudo_q_genrator.generate(x_a_i) psedo_query_set = psedo_query_set.cuda().view( n_way, fine_tune_n_query, *x_a_i.size()[1:]) x = torch.cat((z_support, psedo_query_set), dim=1) loss = pretrained_model.set_forward_loss(x) loss.backward() delta_opt.step() ############################################################################################### # inference pretrained_model.eval() pretrained_model.n_query = n_query with torch.no_grad(): scores = pretrained_model.set_forward(x_var.cuda()) y_query = np.repeat(range(n_way), n_query) topk_scores, topk_labels = scores.data.topk(1, 1, True, True) topk_ind = topk_labels.cpu().numpy() top1_correct = np.sum(topk_ind[:, 0] == y_query) correct_this, count_this = float(top1_correct), len(y_query) acc_all.append((correct_this / count_this * 100)) print("Task %d : %4.2f%% Now avg: %4.2f%%" % (ti, correct_this / count_this * 100, np.mean(acc_all))) ############################################################################################### acc_all = np.asarray(acc_all) acc_mean = np.mean(acc_all) acc_std = np.std(acc_all) print('%d Test Acc = %4.2f%% +- %4.2f%%' % (iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num)))
def single_test(params): acc_all = [] iter_num = 600 few_shot_params = dict(n_way = params.test_n_way , n_support = params.n_shot) if params.dataset in ['omniglot', 'cross_char']: assert params.model == 'Conv4' and not params.train_aug ,'omniglot only support Conv4 without augmentation' params.model = 'Conv4S' if params.method == 'baseline': model = BaselineFinetune( model_dict[params.model], **few_shot_params ) elif params.method == 'baseline++': model = BaselineFinetune( model_dict[params.model], loss_type = 'dist', **few_shot_params ) elif params.method == 'protonet': model = ProtoNet( model_dict[params.model], **few_shot_params ) elif params.method == 'DKT': model = DKT(model_dict[params.model], **few_shot_params) elif params.method == 'matchingnet': model = MatchingNet( model_dict[params.model], **few_shot_params ) elif params.method in ['relationnet', 'relationnet_softmax']: if params.model == 'Conv4': feature_model = backbone.Conv4NP elif params.model == 'Conv6': feature_model = backbone.Conv6NP elif params.model == 'Conv4S': feature_model = backbone.Conv4SNP else: feature_model = lambda: model_dict[params.model]( flatten = False ) loss_type = 'mse' if params.method == 'relationnet' else 'softmax' model = RelationNet( feature_model, loss_type = loss_type , **few_shot_params ) elif params.method in ['maml' , 'maml_approx']: backbone.ConvBlock.maml = True backbone.SimpleBlock.maml = True backbone.BottleneckBlock.maml = True backbone.ResNet.maml = True model = MAML( model_dict[params.model], approx = (params.method == 'maml_approx') , **few_shot_params ) if params.dataset in ['omniglot', 'cross_char']: #maml use different parameter in omniglot model.n_task = 32 model.task_update_num = 1 model.train_lr = 0.1 else: raise ValueError('Unknown method') model = model.cuda() checkpoint_dir = '%s/checkpoints/%s/%s_%s' %(configs.save_dir, params.dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' if not params.method in ['baseline', 'baseline++'] : checkpoint_dir += '_%dway_%dshot' %( params.train_n_way, params.n_shot) #modelfile = get_resume_file(checkpoint_dir) if not params.method in ['baseline', 'baseline++'] : if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir,params.save_iter) else: modelfile = get_best_file(checkpoint_dir) if modelfile is not None: tmp = torch.load(modelfile) model.load_state_dict(tmp['state']) else: print("[WARNING] Cannot find 'best_file.tar' in: " + str(checkpoint_dir)) split = params.split if params.save_iter != -1: split_str = split + "_" +str(params.save_iter) else: split_str = split if params.method in ['maml', 'maml_approx', 'DKT']: #maml do not support testing with feature if 'Conv' in params.model: if params.dataset in ['omniglot', 'cross_char']: image_size = 28 else: image_size = 84 else: image_size = 224 datamgr = SetDataManager(image_size, n_eposide = iter_num, n_query = 15 , **few_shot_params) if params.dataset == 'cross': if split == 'base': loadfile = configs.data_dir['miniImagenet'] + 'all.json' else: loadfile = configs.data_dir['CUB'] + split +'.json' elif params.dataset == 'cross_char': if split == 'base': loadfile = configs.data_dir['omniglot'] + 'noLatin.json' else: loadfile = configs.data_dir['emnist'] + split +'.json' else: loadfile = configs.data_dir[params.dataset] + split + '.json' novel_loader = datamgr.get_data_loader( loadfile, aug = False) if params.adaptation: model.task_update_num = 100 #We perform adaptation on MAML simply by updating more times. model.eval() acc_mean, acc_std = model.test_loop( novel_loader, return_std = True) else: novel_file = os.path.join( checkpoint_dir.replace("checkpoints","features"), split_str +".hdf5") #defaut split = novel, but you can also test base or val classes cl_data_file = feat_loader.init_loader(novel_file) for i in range(iter_num): acc = feature_evaluation(cl_data_file, model, n_query = 15, adaptation = params.adaptation, **few_shot_params) acc_all.append(acc) acc_all = np.asarray(acc_all) acc_mean = np.mean(acc_all) acc_std = np.std(acc_all) print('%d Test Acc = %4.2f%% +- %4.2f%%' %(iter_num, acc_mean, 1.96* acc_std/np.sqrt(iter_num))) with open('./record/results.txt' , 'a') as f: timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime()) aug_str = '-aug' if params.train_aug else '' aug_str += '-adapted' if params.adaptation else '' if params.method in ['baseline', 'baseline++'] : exp_setting = '%s-%s-%s-%s%s %sshot %sway_test' %(params.dataset, split_str, params.model, params.method, aug_str, params.n_shot, params.test_n_way ) else: exp_setting = '%s-%s-%s-%s%s %sshot %sway_train %sway_test' %(params.dataset, split_str, params.model, params.method, aug_str , params.n_shot , params.train_n_way, params.test_n_way ) acc_str = '%d Test Acc = %4.2f%% +- %4.2f%%' %(iter_num, acc_mean, 1.96* acc_std/np.sqrt(iter_num)) f.write( 'Time: %s, Setting: %s, Acc: %s \n' %(timestamp,exp_setting,acc_str) ) return acc_mean
n_support=args.n_shot, # Language options lsl=args.lsl, language_model=lang_model, lang_supervision=args.lang_supervision, l3=args.l3, l3_model=l3_model, l3_n_infer=args.l3_n_infer, ) model = model.cuda() if args.save_iter != -1: modelfile = get_assigned_file(args.checkpoint_dir, args.save_iter) else: modelfile = get_best_file(args.checkpoint_dir) if modelfile is not None: tmp = torch.load(modelfile) model.load_state_dict( tmp["state"], # If language was used for pretraining, ignore # the language model component here. If we want to use language, # make sure the model is loaded strict=args.lsl, ) if args.save_embeddings: if args.lsl: weights = model.language_model.embedding.weight.detach().cpu().numpy() vocab_srt = sorted(list(vocab.items()), key=lambda x: x[1])
def finetune(novel_loader, n_query = 15, pretrained_dataset='miniImageNet', freeze_backbone = False, n_way = 5, n_support = 5): correct = 0 count = 0 iter_num = len(novel_loader) acc_all = [] for _, (x, y) in enumerate(novel_loader): ############################################################################################### # load pretrained model on miniImageNet pretrained_model = model_dict[params.model]() checkpoint_dir = '%s/checkpoints/%s/%s_%s' %(configs.save_dir, pretrained_dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' params.save_iter = -1 if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) elif params.method in ['baseline', 'baseline++'] : modelfile = get_resume_file(checkpoint_dir) else: modelfile = get_best_file(checkpoint_dir) tmp = torch.load(modelfile) state = tmp['state'] state_keys = list(state.keys()) for _, key in enumerate(state_keys): if "feature." in key: newkey = key.replace("feature.","") # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx' state[newkey] = state.pop(key) else: state.pop(key) pretrained_model.load_state_dict(state) ############################################################################################### classifier = Classifier(pretrained_model.final_feat_dim, n_way) ############################################################################################### n_query = x.size(1) - n_support x = x.cuda() x_var = Variable(x) batch_size = 4 support_size = n_way * n_support y_a_i = Variable( torch.from_numpy( np.repeat(range( n_way ), n_support ) )).cuda() # (25,) x_b_i = x_var[:, n_support:,:,:,:].contiguous().view( n_way* n_query, *x.size()[2:]) x_a_i = x_var[:,:n_support,:,:,:].contiguous().view( n_way* n_support, *x.size()[2:]) # (25, 3, 224, 224) ############################################################################################### loss_fn = nn.CrossEntropyLoss().cuda() classifier_opt = torch.optim.SGD(classifier.parameters(), lr = 0.01, momentum=0.9, dampening=0.9, weight_decay=0.001) if freeze_backbone is False: delta_opt = torch.optim.SGD(filter(lambda p: p.requires_grad, pretrained_model.parameters()), lr = 0.01) pretrained_model.cuda() classifier.cuda() ############################################################################################### total_epoch = 100 if freeze_backbone is False: pretrained_model.train() else: pretrained_model.eval() classifier.train() for epoch in range(total_epoch): rand_id = np.random.permutation(support_size) for j in range(0, support_size, batch_size): classifier_opt.zero_grad() if freeze_backbone is False: delta_opt.zero_grad() ##################################### selected_id = torch.from_numpy( rand_id[j: min(j+batch_size, support_size)]).cuda() z_batch = x_a_i[selected_id] y_batch = y_a_i[selected_id] ##################################### output = pretrained_model(z_batch) output = classifier(output) loss = loss_fn(output, y_batch) ##################################### loss.backward() classifier_opt.step() if freeze_backbone is False: delta_opt.step() pretrained_model.eval() classifier.eval() output = pretrained_model(x_b_i.cuda()) scores = classifier(output) y_query = np.repeat(range( n_way ), n_query ) topk_scores, topk_labels = scores.data.topk(1, 1, True, True) topk_ind = topk_labels.cpu().numpy() top1_correct = np.sum(topk_ind[:,0] == y_query) correct_this, count_this = float(top1_correct), len(y_query) print (correct_this/ count_this *100) acc_all.append((correct_this/ count_this *100)) ############################################################################################### acc_all = np.asarray(acc_all) acc_mean = np.mean(acc_all) acc_std = np.std(acc_all) print('%d Test Acc = %4.2f%% +- %4.2f%%' %(iter_num, acc_mean, 1.96* acc_std/np.sqrt(iter_num)))
def baseline_s2m2_init(params): assert params.method != 'maml' and params.method != 'maml_approx', 'maml do not support save_feature and run' if 'Conv' in params.model: if params.dataset in ['omniglot', 'cross_char']: image_size = 28 else: image_size = 84 else: if params.method == "S2M2_R": image_size = 80 else: image_size = 224 if params.dataset in ['omniglot', 'cross_char']: assert params.model == 'Conv4' and not params.train_aug, 'omniglot only support Conv4 without augmentation' params.model = 'Conv4S' split = params.split if params.dataset == 'cross': if split == 'base': loadfile = configs.data_dir['miniImagenet'] + 'all.json' else: loadfile = configs.data_dir['CUB'] + split + '.json' elif params.dataset == 'cross_char': if split == 'base': loadfile = configs.data_dir['omniglot'] + 'noLatin.json' else: loadfile = configs.data_dir['emnist'] + split + '.json' else: loadfile = configs.data_dir[params.dataset] + split + '.json' ###### Temp !!!!!!!!!!!!!!!!! if params.dataset == "cross": dataset = "miniImagenet" else: dataset = params.dataset ###### Temp !!!!!!!!!!!!!!!!! checkpoint_dir = '%s/checkpoints/%s/%s_%s' % (configs.save_dir, dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' if not params.method in ['baseline', 'baseline++', 'S2M2_R']: checkpoint_dir += '_%dway_%dshot' % (params.train_n_way, params.n_shot) if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) # elif params.method in ['baseline', 'baseline++'] : # modelfile = get_resume_file(checkpoint_dir) #comment in 2019/08/03 updates as the validation of baseline/baseline++ is added else: modelfile = get_best_file(checkpoint_dir) if params.save_iter != -1: outfile = os.path.join( checkpoint_dir.replace("checkpoints", "features"), split + "_" + str(params.save_iter) + ".hdf5") else: outfile = os.path.join( checkpoint_dir.replace("checkpoints", "features"), split + ".hdf5") if params.dataset == "cross": outfile = outfile.replace("miniImagenet", "cross") ###### Temp !!!!!!!!!!!!!!!!! # outfile = outfile.replace("miniImagenet", "cross") ###### Temp !!!!!!!!!!!!!!!!! datamgr = SimpleDataManager(image_size, batch_size=64) data_loader = datamgr.get_data_loader(loadfile, aug=False, num_workers=12) if params.method in ['relationnet', 'relationnet_softmax']: if params.model == 'Conv4': model = backbone.Conv4NP() elif params.model == 'Conv6': model = backbone.Conv6NP() elif params.model == 'Conv4S': model = backbone.Conv4SNP() else: model = model_dict[params.model](flatten=False) elif params.method in ['maml', 'maml_approx']: raise ValueError('MAML do not support save feature') elif params.method == "S2M2_R": model = wrn28_10(200) else: model = model_dict[params.model]() print("Using %s" % modelfile) model = model.cuda() tmp = torch.load(modelfile) state = tmp['state'] state_keys = list(state.keys()) if params.method == "S2M2_R": callwrap = False if 'module' in state_keys[0]: callwrap = True if callwrap: model = WrappedModel(model) model_dict_load = model.state_dict() model_dict_load.update(state) model.load_state_dict(model_dict_load) else: for i, key in enumerate(state_keys): if "feature." in key: newkey = key.replace( "feature.", "" ) # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx' state[newkey] = state.pop(key) else: state.pop(key) model.load_state_dict(state) model.eval() dirname = os.path.dirname(outfile) if not os.path.isdir(dirname): os.makedirs(dirname) return model, data_loader, outfile, params
def test_loop(novel_loader, return_std=False, loss_type="softmax", n_query=15, models_to_use=[], finetune_each_model=False, n_way=5, n_support=5): #overwrite parrent function correct = 0 count = 0 iter_num = len(novel_loader) acc_all = [] for _, (x, y) in enumerate(novel_loader): ############################################################################################### pretrained_models = [] for _ in range(len(models_to_use)): pretrained_models.append(model_dict[params.model]()) ############################################################################################### for idx, dataset_name in enumerate(models_to_use): checkpoint_dir = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, models_to_use[idx], params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' params.save_iter = -1 if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) elif params.method in ['baseline', 'baseline++']: modelfile = get_resume_file(checkpoint_dir) else: modelfile = get_best_file(checkpoint_dir) tmp = torch.load(modelfile) state = tmp['state'] state_keys = list(state.keys()) for _, key in enumerate(state_keys): if "feature." in key: newkey = key.replace( "feature.", "" ) # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx' state[newkey] = state.pop(key) else: state.pop(key) pretrained_models[idx].load_state_dict(state) ############################################################################################### n_query = x.size(1) - n_support x = x.cuda() x_var = Variable(x) batch_size = 4 support_size = n_way * n_support ################################################################################## if finetune_each_model: for idx, model_name in enumerate(pretrained_models): pretrained_models[idx].cuda() pretrained_models[idx].train() x_a_i = x_var[:, :n_support, :, :, :].contiguous().view( n_way * n_support, *x.size()[2:]) # (25, 3, 224, 224) loss_fn = nn.CrossEntropyLoss().cuda() cnet = Classifier(pretrained_models[idx].final_feat_dim, n_way).cuda() classifier_opt = torch.optim.SGD(cnet.parameters(), lr=0.01, momentum=0.9, dampening=0.9, weight_decay=0.001) feature_opt = torch.optim.SGD( pretrained_models[idx].parameters(), lr=0.01, momentum=0.9, dampening=0.9, weight_decay=0.001) x_a_i = Variable(x_a_i).cuda() y_a_i = Variable( torch.from_numpy(np.repeat(range(n_way), n_support))).cuda() # (25,) train_size = support_size batch_size = 4 for epoch in range(100): rand_id = np.random.permutation(train_size) for j in range(0, train_size, batch_size): classifier_opt.zero_grad() feature_opt.zero_grad() ##################################### selected_id = torch.from_numpy( rand_id[j:min(j + batch_size, train_size)]).cuda() z_batch = x_a_i[selected_id] y_batch = y_a_i[selected_id] ##################################### outputs = pretrained_models[idx](z_batch) outputs = cnet(outputs) ##################################### loss = loss_fn(outputs, y_batch) loss.backward() for k, param in enumerate( pretrained_models[idx].parameters()): param.grad[torch.lt( torch.abs(param.grad), torch.abs(param.grad).median())] = 0.0 classifier_opt.step() feature_opt.step() ############################################################################################### for idx, model_name in enumerate(pretrained_models): pretrained_models[idx].cuda() pretrained_models[idx].eval() ############################################################################################### all_embeddings_train = [] for idx, model_name in enumerate(pretrained_models): model_embeddings = [] x_a_i = x_var[:, :n_support, :, :, :].contiguous().view( n_way * n_support, *x.size()[2:]) # (25, 3, 224, 224) for idx, module in enumerate(pretrained_models[idx].trunk): x_a_i = module(x_a_i) if len(list(x_a_i.size())) == 4: embedding = F.adaptive_avg_pool2d(x_a_i, (1, 1)).squeeze() model_embeddings.append(embedding.detach()) if params.model == "ResNet10" or params.model == "ResNet18": model_embeddings = model_embeddings[4:-1] elif params.model == "Conv4": model_embeddings = model_embeddings all_embeddings_train.append(model_embeddings) ########################################################## y_a_i = np.repeat(range(n_way), n_support) embeddings_idx_of_each, embeddings_idx_model, embeddings_train, embeddings_best_of_each = train_selection( all_embeddings_train, y_a_i, support_size, n_support, n_way, with_replacement=True) ########################################################## all_embeddings_test = [] for idx, model_name in enumerate(pretrained_models): model_embeddings = [] x_b_i = x_var[:, n_support:, :, :, :].contiguous().view( n_way * n_query, *x.size()[2:]) for idx, module in enumerate(pretrained_models[idx].trunk): x_b_i = module(x_b_i) if len(list(x_b_i.size())) == 4: embedding = F.adaptive_avg_pool2d(x_b_i, (1, 1)).squeeze() model_embeddings.append(embedding.detach()) if params.model == "ResNet10" or params.model == "ResNet18": model_embeddings = model_embeddings[4:-1] elif params.model == "Conv4": model_embeddings = model_embeddings all_embeddings_test.append(model_embeddings) ############################################################################################ embeddings_test = [] for index in embeddings_idx_model: embeddings_test.append( all_embeddings_test[index][embeddings_idx_of_each[index]]) embeddings_test = torch.cat(embeddings_test, 1) ############################################################################################ y_a_i = Variable(torch.from_numpy(np.repeat( range(n_way), n_support))).cuda() # (25,) net = Classifier(embeddings_test.size()[1], n_way).cuda() loss_fn = nn.CrossEntropyLoss().cuda() classifier_opt = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9, dampening=0.9, weight_decay=0.001) total_epoch = 100 embeddings_train = Variable(embeddings_train.cuda()) net.train() for epoch in range(total_epoch): rand_id = np.random.permutation(support_size) for j in range(0, support_size, batch_size): classifier_opt.zero_grad() ##################################### selected_id = torch.from_numpy( rand_id[j:min(j + batch_size, support_size)]).cuda() z_batch = embeddings_train[selected_id] y_batch = y_a_i[selected_id] ##################################### outputs = net(z_batch) ##################################### loss = loss_fn(outputs, y_batch) loss.backward() classifier_opt.step() embeddings_test = Variable(embeddings_test.cuda()) scores = net(embeddings_test) y_query = np.repeat(range(n_way), n_query) topk_scores, topk_labels = scores.data.topk(1, 1, True, True) topk_ind = topk_labels.cpu().numpy() top1_correct = np.sum(topk_ind[:, 0] == y_query) correct_this, count_this = float(top1_correct), len(y_query) print(correct_this / count_this * 100) acc_all.append((correct_this / count_this * 100)) ############################################################################################### acc_all = np.asarray(acc_all) acc_mean = np.mean(acc_all) acc_std = np.std(acc_all) print('%d Test Acc = %4.2f%% +- %4.2f%%' % (iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num)))
iter_num = 600 few_shot_params = dict(n_way=params.test_n_way, n_support=params.n_shot) image_size = params.image_size acc_all = [] if params.loadfile != '': modelfile = params.loadfile checkpoint_dir = params.loadfile else: checkpoint_dir = params.checkpoint_dir # checkpoint path if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) elif params.method in ['baseline', 'baseline++']: modelfile = get_resume_file(checkpoint_dir) else: modelfile = get_best_file( checkpoint_dir) # return the best.tar file if params.method in ['maml', 'maml_approx']: if modelfile is not None: tmp = torch.load(modelfile) state = tmp['state'] state_keys = list(state.keys()) for i, key in enumerate(state_keys): if "feature." in key: newkey = key.replace( "feature.", "" ) # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx' state[newkey] = state.pop(key) else: state.pop(key) model.feature.load_state_dict(tmp['state'])
def run_save(params): print('Run Save features ... ') if 'maml' in params.method: print('Continuing since maml doesnt support save_feature') return image_size = get_image_size(params) if params.dataset in ['omniglot', 'cross_char']: assert params.model == 'Conv4' and not params.train_aug, 'omniglot only support Conv4 without augmentation' params.model = 'Conv4S' split = params.split if params.dataset == 'cross': if split == 'base': loadfile = configs.data_dir['miniImagenet'] + 'all.json' else: loadfile = configs.data_dir['CUB'] + split + '.json' elif params.dataset == 'cross_char': if split == 'base': loadfile = configs.data_dir['omniglot'] + 'noLatin.json' else: loadfile = configs.data_dir['emnist'] + split + '.json' else: loadfile = configs.data_dir[params.dataset] + split + '.json' if hasattr(params, 'checkpoint_dir'): checkpoint_dir = params.checkpoint_dir else: checkpoint_dir = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, params.dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' if not params.method in ['baseline', 'baseline++']: checkpoint_dir += '_%dway_%dshot' % (params.train_n_way, params.n_shot) print(f'Checkpoint dir: {checkpoint_dir}') if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) # elif params.method in ['baseline', 'baseline++'] : # modelfile = get_resume_file(checkpoint_dir) #comment in 2019/08/03 updates as the validation of baseline/baseline++ is added else: modelfile = get_best_file(checkpoint_dir) print(f'Model file {modelfile}') if params.save_iter != -1: outfile = os.path.join( checkpoint_dir.replace("checkpoints", "features"), split + "_" + str(params.save_iter) + ".hdf5") else: outfile = os.path.join( checkpoint_dir.replace("checkpoints", "features"), split + ".hdf5") datamgr = SimpleDataManager(image_size, batch_size=64) data_loader = datamgr.get_data_loader(loadfile, aug=False) if params.method in ['relationnet', 'relationnet_softmax']: if params.model == 'Conv4': model = backbone.Conv4NP() elif params.model == 'Conv6': model = backbone.Conv6NP() elif params.model == 'Conv4S': model = backbone.Conv4SNP() else: model = model_dict[params.model](flatten=False) elif params.method in ['maml', 'maml_approx']: raise ValueError('MAML do not support save feature') else: model = model_dict[params.model]() model = model.cuda() # print('Model keys') # print(model.state_dict().keys()) tmp = torch.load(modelfile) state = tmp['state'] # print('Loaded keys') # print(state.keys()) state_keys = list(state.keys()) for i, key in enumerate(state_keys): if "feature." in key: newkey = key.replace( "feature.", "" ) # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx' state[newkey] = state.pop(key) else: state.pop(key) model.load_state_dict(state) model.eval() dirname = os.path.dirname(outfile) if not os.path.isdir(dirname): os.makedirs(dirname) save_features(model, data_loader, outfile)
def get_logits_targets(params): acc_all = [] iter_num = 600 few_shot_params = dict(n_way = params.test_n_way , n_support = params.n_shot) if params.dataset in ['omniglot', 'cross_char']: assert params.model == 'Conv4' and not params.train_aug ,'omniglot only support Conv4 without augmentation' params.model = 'Conv4S' if params.method == 'baseline': model = BaselineFinetune( model_dict[params.model], **few_shot_params ) elif params.method == 'baseline++': model = BaselineFinetune( model_dict[params.model], loss_type = 'dist', **few_shot_params ) elif params.method == 'protonet': model = ProtoNet( model_dict[params.model], **few_shot_params ) elif params.method == 'DKT': model = DKT(model_dict[params.model], **few_shot_params) elif params.method == 'matchingnet': model = MatchingNet( model_dict[params.model], **few_shot_params ) elif params.method in ['relationnet', 'relationnet_softmax']: if params.model == 'Conv4': feature_model = backbone.Conv4NP elif params.model == 'Conv6': feature_model = backbone.Conv6NP elif params.model == 'Conv4S': feature_model = backbone.Conv4SNP else: feature_model = lambda: model_dict[params.model]( flatten = False ) loss_type = 'mse' if params.method == 'relationnet' else 'softmax' model = RelationNet( feature_model, loss_type = loss_type , **few_shot_params ) elif params.method in ['maml' , 'maml_approx']: backbone.ConvBlock.maml = True backbone.SimpleBlock.maml = True backbone.BottleneckBlock.maml = True backbone.ResNet.maml = True model = MAML( model_dict[params.model], approx = (params.method == 'maml_approx') , **few_shot_params ) if params.dataset in ['omniglot', 'cross_char']: #maml use different parameter in omniglot model.n_task = 32 model.task_update_num = 1 model.train_lr = 0.1 else: raise ValueError('Unknown method') model = model.cuda() checkpoint_dir = '%s/checkpoints/%s/%s_%s' %(configs.save_dir, params.dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' if not params.method in ['baseline', 'baseline++'] : checkpoint_dir += '_%dway_%dshot' %( params.train_n_way, params.n_shot) #modelfile = get_resume_file(checkpoint_dir) if not params.method in ['baseline', 'baseline++'] : if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir,params.save_iter) else: modelfile = get_best_file(checkpoint_dir) if modelfile is not None: tmp = torch.load(modelfile) model.load_state_dict(tmp['state']) else: print("[WARNING] Cannot find 'best_file.tar' in: " + str(checkpoint_dir)) split = params.split if params.save_iter != -1: split_str = split + "_" +str(params.save_iter) else: split_str = split if params.method in ['maml', 'maml_approx', 'DKT']: #maml do not support testing with feature if 'Conv' in params.model: if params.dataset in ['omniglot', 'cross_char']: image_size = 28 else: image_size = 84 else: image_size = 224 datamgr = SetDataManager(image_size, n_eposide = iter_num, n_query = 15 , **few_shot_params) if params.dataset == 'cross': if split == 'base': loadfile = configs.data_dir['miniImagenet'] + 'all.json' else: loadfile = configs.data_dir['CUB'] + split +'.json' elif params.dataset == 'cross_char': if split == 'base': loadfile = configs.data_dir['omniglot'] + 'noLatin.json' else: loadfile = configs.data_dir['emnist'] + split +'.json' else: loadfile = configs.data_dir[params.dataset] + split + '.json' novel_loader = datamgr.get_data_loader( loadfile, aug = False) if params.adaptation: model.task_update_num = 100 #We perform adaptation on MAML simply by updating more times. model.eval() logits_list = list() targets_list = list() for i, (x,_) in enumerate(novel_loader): logits = model.get_logits(x).detach() targets = torch.tensor(np.repeat(range(params.test_n_way), model.n_query)).cuda() logits_list.append(logits) #.cpu().detach().numpy()) targets_list.append(targets) #.cpu().detach().numpy()) else: novel_file = os.path.join( checkpoint_dir.replace("checkpoints","features"), split_str +".hdf5") cl_data_file = feat_loader.init_loader(novel_file) logits_list = list() targets_list = list() n_query = 15 n_way = few_shot_params['n_way'] n_support = few_shot_params['n_support'] class_list = cl_data_file.keys() for i in range(iter_num): #---------------------- select_class = random.sample(class_list,n_way) z_all = [] for cl in select_class: img_feat = cl_data_file[cl] perm_ids = np.random.permutation(len(img_feat)).tolist() z_all.append( [ np.squeeze( img_feat[perm_ids[i]]) for i in range(n_support+n_query) ] ) # stack each batch z_all = torch.from_numpy(np.array(z_all)) model.n_query = n_query logits = model.set_forward(z_all, is_feature = True).detach() targets = torch.tensor(np.repeat(range(n_way), n_query)).cuda() logits_list.append(logits) targets_list.append(targets) #---------------------- return torch.cat(logits_list, 0), torch.cat(targets_list, 0)
def meta_test(novel_loader, n_query=15, pretrained_dataset='miniImageNet', freeze_backbone=False, n_way=5, n_support=5): #novel_loader has 600 dataloaders #n_query=15 #pretrained_dataset=miniImageNet #freeze_backbone=True #n_way=5 #n_support = 5 correct = 0 count = 0 iter_num = len(novel_loader) #600 acc_all = [] for ti, (x, y) in enumerate(novel_loader): ############################################################################################### # load pretrained model on miniImageNet pretrained_model = model_dict[params.model]() checkpoint_dir = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, pretrained_dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' params.save_iter = -1 if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) elif params.method in ['baseline', 'baseline++']: modelfile = get_resume_file(checkpoint_dir) else: modelfile = get_best_file(checkpoint_dir) print( "load from %s" % (modelfile) ) #"./logs/checkpoints/miniImagenet/ResNet10_baseline_aug/399.pth" tmp = torch.load(modelfile) state = tmp['state'] state_keys = list(state.keys()) for _, key in enumerate(state_keys): if "feature." in key: newkey = key.replace("feature.", "") state[newkey] = state.pop(key) #replace key name else: state.pop(key) #remove classifier pretrained_model.load_state_dict(state) #load checkpoints # train a new linear classifier classifier = Classifier( pretrained_model.final_feat_dim, n_way) #initializ only classifier with shape (512,5) for each task ############################################################################################### # split data into support set(5) and query set(15) n_query = x.size(1) - n_support #print(x.size())#torch.Size([5, 20, 3, 224, 224]) #print(n_support)#5 #print("n_query:%d"%(n_query))#15 x = x.cuda() x_var = Variable(x) #print(x_var.data.shape)#torch.Size([5, 20, 3, 224, 224]) # number of dataloaders is 5 and the real input is (20,3,224,224) #print(y)#however, y is useless and its shape is (5,20) => batch=5 and label=20 batch_size = 4 support_size = n_way * n_support #5*5=25 (maybe 5-way and each way contains 5 samples) y_a_i = Variable(torch.from_numpy(np.repeat(range(n_way), n_support))).cuda() #np.repeat(range( n_way ), n_support )=[0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4] #print(y_a_i.data.shape)#torch.Size([25]) #n_way=5 and n_query=15, view(75,3,224,224) #x_var[:, n_support:,:,:,:].shape=(5,15,3,224,224) => sample 5 loaders, where each contains a batch of images with shape (15,3,224,224) x_b_i = x_var[:, n_support:, :, :, :].contiguous().view( n_way * n_query, *x.size()[2:]) # query set #print(x_b_i.shape)#(75,3,224,224) # 5 class loaders in total. Thus, batch size = 15*5 =75 #x_b_i.shape=75,3,224,224 #n_way * n_query ... (maybe 5-way and each way contains 15 samples) #n_way=5 and n_support=5, view(25,3,224,224) #x_var[:, :n_support,:,:,:].shape=(5,5,3,224,224) x_a_i = x_var[:, :n_support, :, :, :].contiguous().view( n_way * n_support, *x.size()[2:]) # support set #x_a_u.shape=25,3,224,224 ################################################################################################ # loss function and optimizer setting loss_fn = nn.CrossEntropyLoss().cuda() classifier_opt = torch.optim.SGD(classifier.parameters(), lr=0.01, momentum=0.9, dampening=0.9, weight_decay=0.001) if freeze_backbone is False: #for finetune use delta_opt = torch.optim.SGD(filter(lambda p: p.requires_grad, pretrained_model.parameters()), lr=0.01) pretrained_model.cuda( ) #pretrained on "mini-ImageNet" instead of "ImageNet" classifier.cuda() ############################################################################################### # fine-tuning #In the fine-tuning or meta-testing stage for all methods, we average the results over 600 experiments. #In each experiment, we randomly sample 5 classes from novel classes, and in each class, we also #pick k instances for the support set and 16 for the query set. #For Baseline and Baseline++, we use the entire support set to train a new classifier for 100 iterations with a batch size of 4. #For meta-learning methods, we obtain the classification model conditioned on the support set total_epoch = 100 if freeze_backbone is False: #for finetune use pretrained_model.train() else: # if you don't want finetune pretrained_model.eval() classifier.train( ) #classifier should be dependent on task. Thus, we should update the classifier weights for epoch in range(total_epoch): #train classifier 100 epoch rand_id = np.random.permutation(support_size) #rand_id.shape=25 #support_size=25 #batch_size=4 # using "support set" to train the classifier (and fine-tune the backbone). for j in range(0, support_size, batch_size): #support_size=25, batch_size=4 classifier_opt.zero_grad() #clear classifier optimizer if freeze_backbone is False: #for finetune use delta_opt.zero_grad() #update feature extractor selected_id = torch.from_numpy( rand_id[j:min(j + batch_size, support_size)]).cuda( ) #fetch only 4 elements #x_a_i.shape=25,3,224,224 #y_a_i.shape=25 z_batch = x_a_i[ selected_id] #sample 4 inputs randomly from support set data #z_batch.shape=4,3,224,224 #y_a_i=[0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4] y_batch = y_a_i[ selected_id] #sample 4 labels randomly from support set label #y_batch.shape=4 output = pretrained_model(z_batch) #feature output = classifier(output) #predictions loss = loss_fn(output, y_batch) loss.backward() classifier_opt.step() #update classifier optimizer if freeze_backbone is False: #for finetune use delta_opt.step() #update extractor ############################################################################################## # inference pretrained_model.eval() classifier.eval() output = pretrained_model(x_b_i.cuda()) #features scores = classifier(output) #predictions y_query = np.repeat(range(n_way), n_query) #shape=(75) #y_query=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, # 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] topk_scores, topk_labels = scores.data.topk(1, 1, True, True) #the 1st argument means return top-1 #the 2nd argument dim=1 means return the value row-wisely #the 3rd arguemtn is largest=True #the 4th argument is sorted=True #topk_labels=[[1],[1], ..., [0],[0]] with shape (75,1) cuz batch=75 topk_ind = topk_labels.cpu().numpy() top1_correct = np.sum(topk_ind[:, 0] == y_query) correct_this, count_this = float(top1_correct), len(y_query) acc_all.append((correct_this / count_this * 100)) print("Task %d : %4.2f%% Now avg: %4.2f%%" % (ti, correct_this / count_this * 100, np.mean(acc_all))) ############################################################################################### acc_all = np.asarray(acc_all) acc_mean = np.mean(acc_all) acc_std = np.std(acc_all) print('%d Test Acc = %4.2f%% +- %4.2f%%' % (iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num)))
else: checkpoint_dir_dct = '%s/checkpoints/%s/%sdct_%s_%sway_%sshot' %(configs.save_dir, params.dataset, params.model,params.method, params.test_n_way, params.n_shot) if params.train_aug: checkpoint_dir_plain +='_aug' checkpoint_dir_dct += '_aug' checkpoint_dir_dct += '_dct' print("chechpoint_dir_plain: ", checkpoint_dir_plain) print("checkpoint_dir_dct:", checkpoint_dir_dct) if params.save_iter != -1: modelfile_plain = get_assigned_file(checkpoint_dir_plain,params.save_iter) modelfile_dct = get_assigned_file(checkpoint_dir_dct,params.save_iter) else: modelfile_plain = get_best_file(checkpoint_dir_plain) modelfile_dct = get_best_file(checkpoint_dir_dct) if params.save_iter != -1: outfile = os.path.join( checkpoint_dir_plain.replace("checkpoints","features"), split + "_" + str(params.save_iter)+ "_both.hdf5") else: outfile = os.path.join( checkpoint_dir_plain.replace("checkpoints","features"), split + "_both.hdf5") datamgr = SimpleDataManager_both(image_size, batch_size = 1) print("out file: ", outfile) data_loader_dct = datamgr.get_data_loader_dct(loadfile, aug = False) data_loader_plain = datamgr.get_data_loader(loadfile, aug = False) if params.method == 'manifold_mixup':
model = to_cuda(model) checkpoint_dir = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, params.dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' if not params.method in ['baseline', 'baseline++']: checkpoint_dir += '_%dway_%dshot' % (params.train_n_way, params.n_shot) #modelfile = get_resume_file(checkpoint_dir) if not params.method in ['baseline', 'baseline++']: if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) else: modelfile = get_best_file(checkpoint_dir) if modelfile is not None: tmp = torch.load(modelfile) model.load_state_dict(tmp['state']) split = params.split if params.save_iter != -1: split_str = split + "_" + str(params.save_iter) else: split_str = split if params.method in ['maml', 'maml_approx' ]: #maml do not support testing with feature if 'Conv' in params.model: if params.dataset in ['omniglot', 'cross_char']: image_size = 28 else:
def finetune(novel_loader, n_query=15, freeze_backbone=False, n_way=5, n_support=5, loadpath='', adaptation=False, pretrained_dataset='miniImagenet', proto_init=False): correct = 0 count = 0 iter_num = len(novel_loader) acc_all = [] with tqdm(enumerate(novel_loader), total=len(novel_loader)) as pbar: for _, (x, y) in pbar: #, position=1, #leave=False): ############################################################################################### # load pretrained model on miniImageNet pretrained_model = model_dict[params.model]() checkpoint_dir = '%s/checkpoints/%s/%s_%s_%s%s_%s%s' % ( configs.save_dir, params.dataset, params.model, params.method, params.n_support, "s" if params.no_aug_support else "s_aug", params.n_query, "q" if params.no_aug_query else "q_aug") checkpoint_dir += "_bs{}".format(params.batch_size) if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) elif params.method in ['baseline', 'baseline++']: modelfile = get_resume_file(checkpoint_dir) else: modelfile = get_best_file(checkpoint_dir) tmp = torch.load(modelfile) state = tmp['state'] state_keys = list(state.keys()) for _, key in enumerate(state_keys): if "feature." in key: newkey = key.replace( "feature.", "" ) # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx' state[newkey] = state.pop(key) else: state.pop(key) pretrained_model.load_state_dict(state) pretrained_model.cuda() pretrained_model.train() ############################################################################################### if adaptation: classifier = Classifier(pretrained_model.final_feat_dim, n_way) classifier.cuda() classifier.train() else: classifier = ProtoClassifier(n_way, n_support, n_query) ############################################################################################### n_query = x.size(1) - n_support x = x.cuda() x_var = Variable(x) batch_size = n_way support_size = n_way * n_support y_a_i = Variable( torch.from_numpy(np.repeat(range(n_way), n_support))).cuda() # (25,) x_b_i = x_var[:, n_support:, :, :, :].contiguous().view( n_way * n_query, *x.size()[2:]) x_a_i = x_var[:, :n_support, :, :, :].contiguous().view( n_way * n_support, *x.size()[2:]) # (25, 3, 224, 224) pretrained_model.eval() z_a_i = pretrained_model(x_a_i.cuda()) pretrained_model.train() ############################################################################################### loss_fn = nn.CrossEntropyLoss().cuda() if adaptation: inner_lr = params.lr_rate if proto_init: # Initialise as distance classifer (distance to prototypes) classifier.init_params_from_prototypes( z_a_i, n_way, n_support) #classifier_opt = torch.optim.SGD(classifier.parameters(), lr = inner_lr, momentum=0.9, dampening=0.9, weight_decay=0.001) classifier_opt = torch.optim.Adam(classifier.parameters(), lr=inner_lr) if freeze_backbone is False: delta_opt = torch.optim.Adam(filter( lambda p: p.requires_grad, pretrained_model.parameters()), lr=inner_lr) total_epoch = params.ft_steps if freeze_backbone is False: pretrained_model.train() else: pretrained_model.eval() classifier.train() #for epoch in range(total_epoch): for epoch in tqdm(range(total_epoch), total=total_epoch, leave=False): rand_id = np.random.permutation(support_size) for j in range(0, support_size, batch_size): classifier_opt.zero_grad() if freeze_backbone is False: delta_opt.zero_grad() ##################################### selected_id = torch.from_numpy( rand_id[j:min(j + batch_size, support_size)]).cuda() z_batch = x_a_i[selected_id] y_batch = y_a_i[selected_id] ##################################### output = pretrained_model(z_batch) output = classifier(output) loss = loss_fn(output, y_batch) ##################################### loss.backward() classifier_opt.step() if freeze_backbone is False: delta_opt.step() classifier.eval() pretrained_model.eval() output = pretrained_model(x_b_i.cuda()) if adaptation: scores = classifier(output) else: scores = classifier(z_a_i, y_a_i, output) y_query = np.repeat(range(n_way), n_query) topk_scores, topk_labels = scores.data.topk(1, 1, True, True) topk_ind = topk_labels.cpu().numpy() top1_correct = np.sum(topk_ind[:, 0] == y_query) correct_this, count_this = float(top1_correct), len(y_query) #print (correct_this/ count_this *100) acc_all.append((correct_this / count_this * 100)) ############################################################################################### pbar.set_postfix(avg_acc=np.mean(np.asarray(acc_all))) acc_all = np.asarray(acc_all) acc_mean = np.mean(acc_all) acc_std = np.std(acc_all) print('%d Test Acc = %4.2f%% +- %4.2f%%' % (iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num)))
def meta_test(novel_loader, n_query=15, pretrained_dataset='miniImageNet', freeze_backbone=False, n_pseudo=100, n_way=5, n_support=5): #few_shot_params={"n_way":5, "n_support":5} #pretrained_dataset = "miniImageNet" #n_pseudo=100 #n_way=5 # five class #n_support=5 # each class contain 5 support images. Thus, 25 query images in total #freeze_backbone=True #n_query=15 # each class contains 15 query images. Thus, 75 query images in total correct = 0 count = 0 iter_num = len(novel_loader) #600 acc_all = [] for ti, (x, y) in enumerate(novel_loader): #600 "ti"mes ############################################################################################### # load pretrained model on miniImageNet if params.method == 'protonet': pretrained_model = ProtoNet(model_dict[params.model], n_way=n_way, n_support=n_support) elif 'mytpn' in params.method: pretrained_model = MyTPN(model_dict[params.model], n_way=n_way, n_support=n_support) checkpoint_dir = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, pretrained_dataset, params.model, params.method) if params.train_aug: checkpoint_dir += '_aug' checkpoint_dir += '_5way_5shot' params.save_iter = -1 if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir, params.save_iter) else: modelfile = get_best_file(checkpoint_dir) print( "load from %s" % (modelfile) ) #logs/checkpoints/miniImageNet/ResNet10_protonet_aug_5way_5shot/best_model.tar tmp = torch.load(modelfile) state = tmp['state'] pretrained_model.load_state_dict(state) #load checkpoints to model pretrained_model.cuda() ############################################################################################### # split data into support set and query set n_query = x.size(1) - n_support #20-5=15 x = x.cuda() ##torch.Size([5, 20, 3, 224, 224]) x_var = Variable(x) support_size = n_way * n_support #25 y_a_i = Variable(torch.from_numpy(np.repeat( range(n_way), n_support))).cuda() # (25,) x_b_i = x_var[:, n_support:, :, :, :].contiguous().view( n_way * n_query, *x.size()[2:]) # query set (75,3,224,224) x_a_i = x_var[:, :n_support, :, :, :].contiguous().view( n_way * n_support, *x.size()[2:]) # support set (25,3,224,224) if freeze_backbone == False: ############################################################################################### # Finetune components initialization pseudo_q_genrator = PseudoQeuryGenerator(n_way, n_support, n_pseudo) delta_opt = torch.optim.Adam( filter(lambda p: p.requires_grad, pretrained_model.parameters())) ############################################################################################### # finetune process finetune_epoch = 100 fine_tune_n_query = n_pseudo // n_way # 100//5 =20 pretrained_model.n_query = fine_tune_n_query #20 pretrained_model.train() z_support = x_a_i.view(n_way, n_support, *x_a_i.size()[1:]) #(5,5,3,224,224) for epoch in range(finetune_epoch): #100 EPOCH delta_opt.zero_grad() #clear feature extractor gradient # generate pseudo query images psedo_query_set, _ = pseudo_q_genrator.generate(x_a_i) psedo_query_set = psedo_query_set.cuda().view( n_way, fine_tune_n_query, *x_a_i.size()[1:]) #(5,20,3,224,224) x = torch.cat((z_support, psedo_query_set), dim=1) loss = pretrained_model.set_forward_loss(x) loss.backward() delta_opt.step() ############################################################################################### # inference pretrained_model.eval() pretrained_model.n_query = n_query #15 with torch.no_grad(): scores = pretrained_model.set_forward( x_var.cuda()) #set_forward in protonet.py y_query = np.repeat(range(n_way), n_query) #[0,...0, ...4,...4] with shape (75) topk_scores, topk_labels = scores.data.topk(1, 1, True, True) #the 1st argument means return top-1 #the 2nd argument dim=1 means return the value row-wisely #the 3rd arguemtn is largest=True #the 4th argument is sorted=True topk_ind = topk_labels.cpu().numpy() top1_correct = np.sum(topk_ind[:, 0] == y_query) correct_this, count_this = float(top1_correct), len(y_query) acc_all.append((correct_this / count_this * 100)) print("Task %d : %4.2f%% Now avg: %4.2f%%" % (ti, correct_this / count_this * 100, np.mean(acc_all))) ############################################################################################### acc_all = np.asarray(acc_all) acc_mean = np.mean(acc_all) acc_std = np.std(acc_all) print('%d Test Acc = %4.2f%% +- %4.2f%%' % (iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num)))
if params.train_n_way != -1: checkpoint_dir += '_%d-way_' %( params.train_n_way ) else: checkpoint_dir += '_random-way_' if params.train_n_shot != -1: checkpoint_dir += '%d-shot' % ( params.train_n_shot ) else: checkpoint_dir += 'random-shot' #modelfile = get_resume_file(checkpoint_dir) print(checkpoint_dir) if params.save_iter != -1: modelfile = get_assigned_file(checkpoint_dir,params.save_iter) else: modelfile = get_best_file(checkpoint_dir, params.test_n_way) if modelfile is not None: tmp = torch.load(modelfile) model.load_state_dict(tmp['state']) split = params.split if params.save_iter != -1: split_str = split + "_" +str(params.save_iter) else: split_str = split novel_file = os.path.join( checkpoint_dir.replace("checkpoints","features"), split_str + ".hdf5") print('feature file: '+ novel_file) cl_data_file = feat_loader.init_loader(novel_file)
elif params.method in ["dampnet_full_class"]: model = dampnet_full_class.DampNet(model_dict[params.model], **few_shot_params) elif params.method == "baseline": checkpoint_dir_b = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, pretrained_dataset, params.model, "baseline") if params.train_aug: checkpoint_dir_b += '_aug' if params.save_iter != -1: modelfile_b = get_assigned_file(checkpoint_dir_b, 400) elif params.method in ['baseline', 'baseline++']: modelfile_b = get_resume_file(checkpoint_dir_b) else: modelfile_b = get_best_file(checkpoint_dir_b) tmp_b = torch.load(modelfile_b) state_b = tmp_b['state'] elif params.method == "all": #model = ProtoNet( model_dict[params.model], **few_shot_params ) checkpoint_dir = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, 'miniImageNet', params.model, "protonet") model_2 = GnnNet(model_dict[params.model], **few_shot_params) checkpoint_dir2 = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, 'miniImageNet', params.model, "gnnnet") #model_3 = dampnet_full_class.DampNet( model_dict[params.model], **few_shot_params ) checkpoint_dir3 = '%s/checkpoints/%s/%s_%s' % ( configs.save_dir, 'miniImageNet', params.model, "dampnet_full_class")
def __init__(self, params): np.random.seed(10) if params.train_dataset == 'cross': base_file = configs.data_dir['miniImagenet'] + 'all.json' val_file = configs.data_dir['CUB'] + 'val.json' elif params.train_dataset == 'cross_char': base_file = configs.data_dir['omniglot'] + 'noLatin.json' val_file = configs.data_dir['emnist'] + 'val.json' else: base_file = configs.data_dir[params.train_dataset] + 'base.json' val_file = configs.data_dir[params.train_dataset] + 'val.json' if 'Conv' in params.model: if params.train_dataset in ['omniglot', 'cross_char']: image_size = 28 else: image_size = 84 else: image_size = 224 if params.train_dataset in ['omniglot', 'cross_char']: assert params.model == 'Conv4' and not params.train_aug, 'omniglot only support Conv4 without augmentation' params.model = 'Conv4S' if params.train_dataset == 'omniglot': assert params.num_classes >= 4112, 'class number need to be larger than max label id in base class' if params.train_dataset == 'cross_char': assert params.num_classes >= 1597, 'class number need to be larger than max label id in base class' params.train_num_query = max( 1, int(params.test_num_query * params.test_num_way / params.train_num_way)) if params.episodic: train_few_shot_params = dict(n_way=params.train_num_way, n_support=params.train_num_shot, n_query=params.train_num_query) base_datamgr = SetDataManager(image_size, **train_few_shot_params) base_loader = base_datamgr.get_data_loader(base_file, aug=params.train_aug) else: base_datamgr = SimpleDataManager(image_size, batch_size=32) base_loader = base_datamgr.get_data_loader(base_file, aug=params.train_aug) if params.test_dataset == 'cross': novel_file = configs.data_dir['CUB'] + 'novel.json' elif params.test_dataset == 'cross_char': novel_file = configs.data_dir['emnist'] + 'novel.json' else: novel_file = configs.data_dir[params.test_dataset] + 'novel.json' val_datamgr = SimpleDataManager(image_size, batch_size=64) val_loader = val_datamgr.get_data_loader(novel_file, aug=False) novel_datamgr = SimpleDataManager(image_size, batch_size=64) novel_loader = novel_datamgr.get_data_loader(novel_file, aug=False) optimizer = params.optimizer if params.stop_epoch == -1: if params.train_dataset in ['omniglot', 'cross_char']: params.stop_epoch = 5 elif params.train_dataset in ['CUB']: params.stop_epoch = 200 # This is different as stated in the open-review paper. However, using 400 epoch in baseline actually lead to over-fitting elif params.train_dataset in ['miniImagenet', 'cross']: params.stop_epoch = 300 else: params.stop_epoch = 300 shake_config = { 'shake_forward': params.shake_forward, 'shake_backward': params.shake_backward, 'shake_picture': params.shake_picture } train_param = { 'loss_type': params.train_loss_type, 'temperature': params.train_temperature, 'margin': params.train_margin, 'lr': params.train_lr, 'shake': params.shake, 'shake_config': shake_config, 'episodic': params.episodic, 'num_way': params.train_num_way, 'num_shot': params.train_num_shot, 'num_query': params.train_num_query, 'num_classes': params.num_classes } test_param = { 'loss_type': params.test_loss_type, 'temperature': params.test_temperature, 'margin': params.test_margin, 'lr': params.test_lr, 'num_way': params.test_num_way, 'num_shot': params.test_num_shot, 'num_query': params.test_num_query } model = Baseline(model_dict[params.model], params.entropy, train_param, test_param) model = model.cuda() key = params.tag writer = SummaryWriter(log_dir=os.path.join(params.vis_log, key)) params.checkpoint_dir = '%s/checkpoints/%s/%s' % ( configs.save_dir, params.train_dataset, params.checkpoint_dir) if not os.path.isdir(params.vis_log): os.makedirs(params.vis_log) outfile_template = os.path.join( params.checkpoint_dir.replace("checkpoints", "features"), "%s.hdf5") if params.mode == 'train' and not os.path.isdir(params.checkpoint_dir): os.makedirs(params.checkpoint_dir) if params.resume or params.mode == 'test': if params.mode == 'test': self.feature_model = model_dict[params.model]().cuda() resume_file = get_best_file(params.checkpoint_dir) tmp = torch.load(resume_file) state = tmp['state'] state_keys = list(state.keys()) for i, key in enumerate(state_keys): if "feature." in key: newkey = key.replace("feature.", "") state[newkey] = state.pop(key) else: state.pop(key) self.feature_model.load_state_dict(state) self.feature_model.eval() else: resume_file = get_resume_file(params.checkpoint_dir) tmp = torch.load(resume_file) state = tmp['state'] model.load_state_dict(state) params.start_epoch = tmp['epoch'] + 1 print('Info: Model loaded!!!') self.params = params self.val_file = val_file self.base_file = base_file self.image_size = image_size self.optimizer = optimizer self.outfile_template = outfile_template self.novel_loader = novel_loader self.base_loader = base_loader self.val_loader = val_loader self.writer = writer self.model = model self.key = key