def main(): args = parse_args() dataset = args.dataset if dataset == 'cpv1': dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl') elif dataset == 'cpv2' or dataset == 'v2': dictionary = Dictionary.load_from_file('data/dictionary.pkl') print("Building train dataset...") train_dset = VQAFeatureDataset('train', dictionary, dataset=dataset, cache_image_features=args.cache_features) print("Building test dataset...") eval_dset = VQAFeatureDataset('val', dictionary, dataset=dataset, cache_image_features=args.cache_features) lable2answer = eval_dset.label2ans bias_p = get_bias(train_dset, eval_dset) bias_color = bias_p['what color is'] bias_color_top5 = bias_color.argsort()[::-1][0:5] bias_color_p = [] bias_color_word = [] for i in bias_color_top5: bias_color_p.append(bias_color[i]) bias_color_word.append(lable2answer[i]) print(bias_color_p) print(bias_color_word)
def main(): args = parse_args() dataset = args.dataset with open('util/qid2type_%s.json' % args.dataset, 'r') as f: qid2type = json.load(f) if dataset == 'cpv1': dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl') elif dataset == 'cpv2' or dataset == 'v2': dictionary = Dictionary.load_from_file('data/dictionary.pkl') print("Building test dataset...") eval_dset = VQAFeatureDataset('val', dictionary, dataset=dataset, cache_image_features=args.cache_features) # Build the model using the original constructor constructor = 'build_%s' % args.model model = getattr(CCB_model, constructor)(eval_dset, args.num_hid).cuda() #model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda() if args.debias == "bias_product": model.debias_loss_fn = BiasProduct() elif args.debias == "none": model.debias_loss_fn = Plain() elif args.debias == "reweight": model.debias_loss_fn = ReweightByInvBias() elif args.debias == "learned_mixin": model.debias_loss_fn = LearnedMixin(args.entropy_penalty) elif args.debias == "CCB_loss": model.debias_loss_fn = CCB_loss(args.entropy_penalty) else: raise RuntimeError(args.mode) model_state = torch.load(args.model_state) model.load_state_dict(model_state) model = model.cuda() batch_size = args.batch_size torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True # The original version uses multiple workers, but that just seems slower on my setup eval_loader = DataLoader(eval_dset, batch_size, shuffle=False, num_workers=5) print("Starting eval...") evaluate(model, eval_loader, qid2type)
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu net = Question_Classifier(args.bert_mode, args.bert_pretrain, num_classes=3) net.load_state_dict( torch.load(args.load_path, map_location=lambda storage, loc: storage)) torch.cuda.set_device(device=0) net.cuda() dictionary = Dictionary.load_from_file(args.dictionary_path) valset = Question_Dataset('val', dictionary, args.data_root, question_len=12) testset = Question_Dataset('test', dictionary, args.data_root, question_len=12) valloader = DataLoader(valset, batch_size=args.batch_size, shuffle=False, num_workers=2) testloader = DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=2) net.eval() val_acc = 0.0 test_acc = 0.0 with torch.no_grad(): for ii, sample_batched in enumerate(valloader): question, label = sample_batched['question'], sample_batched[ 'label'] question, label = question.cuda(), label.cuda() out = net.forward(question) tmp_acc = utils.cal_acc(out, label) val_acc += (tmp_acc * question.shape[0]) val_acc /= len(valset) for ii, sample_batched in enumerate(testloader): question, label = sample_batched['question'], sample_batched[ 'label'] question, label = question.cuda(), label.cuda() out = net.forward(question) tmp_acc = utils.cal_acc(out, label) test_acc += (tmp_acc * question.shape[0]) test_acc /= len(testset) print('valset || questions: %d acc: %.4f' % (len(valset), val_acc)) print('testset || questions: %d acc: %.4f' % (len(testset), test_acc))
def main(): parser = argparse.ArgumentParser( "Save a model's predictions for the VQA-CP test set") parser.add_argument("model", help="Directory of the model") parser.add_argument("output_file", help="File to write json output to") args = parser.parse_args() path = args.model print("Loading data...") dictionary = Dictionary.load_from_file('data/dictionary.pkl') train_dset = VQAFeatureDataset('train', dictionary, cp=True) eval_dset = VQAFeatureDataset('val', dictionary, cp=True) eval_loader = DataLoader(eval_dset, 256, shuffle=False, num_workers=0) constructor = 'build_%s' % 'baseline0_newatt' model = getattr(base_model, constructor)(train_dset, 1024).cuda() print("Loading state dict for %s..." % path) state_dict = torch.load(join(path, "model.pth")) if all(k.startswith("module.") for k in state_dict): filtered = {} for k in state_dict: filtered[k[len("module."):]] = state_dict[k] state_dict = filtered for k in list(state_dict): if k.startswith("debias_loss_fn"): del state_dict[k] model.load_state_dict(state_dict) model.cuda() model.eval() print("Done") predictions = [] for v, q, a, b in tqdm(eval_loader, ncols=100, total=len(eval_loader), desc="eval"): v = Variable(v, volatile=True).cuda() q = Variable(q, volatile=True).cuda() factor = model(v, None, q, None, None, True)[0] prediction = torch.max(factor, 1)[1].data.cpu().numpy() for p in prediction: predictions.append(train_dset.label2ans[p]) out = [] for p, e in zip(predictions, eval_dset.entries): out.append(dict(answer=p, question_id=e["question_id"])) with open(join(path, args.output_file), "w") as f: json.dump(out, f)
def evalFromImages(args): # Fetch data. dictionary = Dictionary.load_from_file('data/dictionary.pkl') print "Fetching eval data" imageLoader = imageModel.ImageLoader("data/val2014img", "val") eval_dset = VQAFeatureDataset('valSample', args.evalset_name, dictionary, imageLoader=imageLoader) # Fetch model. model = imageModel.getCombinedModel(args, eval_dset) model = nn.DataParallel(model).cuda() # Evaluate eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True) print "Evaluating..." model.train(False) eval_score, bound = train.evaluate(model, eval_loader) print "eval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)
def evalNormal(args): # Fetch data. dictionary = Dictionary.load_from_file('data/dictionary.pkl') print "Fetching eval data" eval_dset = VQAFeatureDataset('val', args.evalset_name, dictionary) # Fetch model. constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') model = nn.DataParallel(model).cuda() if args.load_path: load_path = os.path.join(args.load_path, 'model.pth') print "Loading model from {}".format(load_path) model.load_state_dict(torch.load(load_path)) # Evaluate eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True) print "Evaluating..." model.train(False) eval_score, bound = train.evaluate(model, eval_loader) print "eval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)
def trainNormal(args): # Fetch data. dictionary = Dictionary.load_from_file('data/dictionary.pkl') print "Fetching train data" train_dset = VQAFeatureDataset('train', 'train', dictionary) print "Fetching eval data" eval_dset = VQAFeatureDataset('valSample', args.evalset_name, dictionary) # Fetch model. constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') model = nn.DataParallel(model).cuda() if args.load_path: load_path = os.path.join(args.load_path, 'model.pth') print "Loading model from {}".format(load_path) model.load_state_dict(torch.load(load_path)) # Train. train_loader = DataLoader(train_dset, args.batch_size, shuffle=True) eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True) train.train(model, train_loader, eval_loader, args.epochs, args.output)
with open(glove_file, 'r', encoding='utf-8') as f: entries = f.readlines() emb_dim = len(entries[0].split(' ')) - 1 print('embedding dim is %d' % emb_dim) weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32) for entry in entries: vals = entry.split(' ') word = vals[0] vals = list(map(float, vals[1:])) word2emb[word] = np.array(vals) for idx, word in enumerate(idx2word): if word not in word2emb: continue weights[idx] = word2emb[word] return weights, word2emb if __name__ == '__main__': d = create_dictionary(config.data_path) d.dump_to_file('./data/dictionary.pkl') d = Dictionary.load_from_file('./data/dictionary.pkl') emb_dim = 300 #glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim glove_file = os.path.join(config.data_glove_path, os.listdir(config.data_glove_path)[2]) weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save('data/glove6b_init_%dd.npy' % emb_dim, weights)
args = parser.parse_args() return args if __name__ == '__main__': print( 'Evaluate a given model optimized by training split using validation split.' ) args = parse_args() torch.backends.cudnn.benchmark = True if args.task == 'vqa': from train import evaluate dict_path = 'data/dictionary.pkl' dictionary = Dictionary.load_from_file(dict_path) eval_dset = VQAFeatureDataset('val', dictionary, adaptive=True) elif args.task == 'flickr': from train_flickr import evaluate dict_path = 'data/flickr30k/dictionary.pkl' dictionary = Dictionary.load_from_file(dict_path) eval_dset = Flickr30kFeatureDataset('test', dictionary) args.op = '' args.gamma = 1 n_device = torch.cuda.device_count() batch_size = args.batch_size * n_device constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(eval_dset, args.num_hid, args.op,
def create_glove_embedding_init(idx2word, glove_file): word2emb = {} with open(glove_file, 'r') as f: entries = f.readlines() emb_dim = len(entries[0].split(' ')) - 1 print('embedding dim is %d' % emb_dim) weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32) for entry in entries: vals = entry.split(' ') word = vals[0] # vals = map(float, vals[1:]) vals = [float(i) for i in vals[1:]] word2emb[word] = np.array(vals) for idx, word in enumerate(idx2word): if word not in word2emb: continue weights[idx] = word2emb[word] return weights, word2emb if __name__ == '__main__': d = create_dictionary('vqa_data') d.dump_to_file('vqa_data/dictionary.pkl') d = Dictionary.load_from_file('vqa_data/dictionary.pkl') emb_dim = 300 glove_file = 'vqa_data/glove/glove.6B.%dd.txt' % emb_dim weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save('vqa_data/glove6b_init_%dd.npy' % emb_dim, weights)
else: args.combine_with_dataroot = None if args.combine_with_splits is not None: args.combine_with_splits = args.combine_with_splits.split(",") args.emb_dim = 300 args.out_dictionary_json_file = args.dataroot + '/bottom-up-attention/combined_and_individual.json' return args if __name__ == '__main__': args = parse_args() dataroot = args.dataroot if args.old_dictionary_file is not None: old_dictionary = Dictionary.load_from_file(args.old_dictionary_file) else: old_dictionary = None d = create_dictionary(dataroot, args.dataset, old_dictionary=old_dictionary, args=args) with open(os.path.join(dataroot, 'bottom-up-attention', 'dictionary.json')) as f: combined_dict = json.load(f) combined_word_to_ix = combined_dict['word_to_ix'] idx2word = {} combined_ix_to_curr_ix = {}
def main(): args = parse_args() dictionary = Dictionary.load_from_file('data/dictionary.pkl') cp = not args.nocp print("Building train dataset...") train_dset = VQAFeatureDataset('train', dictionary, cp=cp, cache_image_features=args.cache_features) print("Building test dataset...") eval_dset = VQAFeatureDataset('val', dictionary, cp=cp, cache_image_features=args.cache_features) answer_voc_size = train_dset.num_ans_candidates # Compute the bias: # The bias here is just the expected score for each answer/question type # question_type -> answer -> total score question_type_to_probs = defaultdict(Counter) # question_type -> num_occurances print("# question_type -> num_occurances") question_type_to_count = Counter() for ex in train_dset.entries: ans = ex["answer"] q_type = ans["question_type"] question_type_to_count[q_type] += 1 if ans["labels"] is not None: for label, score in zip(ans["labels"], ans["scores"]): question_type_to_probs[q_type][label] += score question_type_to_prob_array = {} for q_type, count in question_type_to_count.items(): prob_array = np.zeros(answer_voc_size, np.float32) for label, total_score in question_type_to_probs[q_type].items(): prob_array[label] += total_score prob_array /= count question_type_to_prob_array[q_type] = prob_array print(" ... DONE") # Now add a `bias` field to each example for ds in [train_dset, eval_dset]: for ex in ds.entries: q_type = ex["answer"]["question_type"] ex["bias"] = question_type_to_prob_array[q_type] # Build the model using the original constructor constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') print("BUILT MODEL.") # Add the loss_fn based our arguments if args.mode == "bias_product": model.debias_loss_fn = BiasProduct() elif args.mode == "none": model.debias_loss_fn = Plain() elif args.mode == "reweight": model.debias_loss_fn = ReweightByInvBias() elif args.mode == "learned_mixin": model.debias_loss_fn = LearnedMixin(args.entropy_penalty) else: raise RuntimeError(args.mode) # Record the bias function we are using utils.create_dir(args.output) with open(args.output + "/debias_objective.json", "w") as f: js = model.debias_loss_fn.to_json() json.dump(js, f, indent=2) model = model.cuda() print(sum(p.numel() for p in model.parameters())) '''
args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() utils.create_dir(args.output) logger = utils.Logger(os.path.join(args.output, 'log.txt')) logger.write(args.__repr__()) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file('data/dictionary.pkl') train_dset = VQAFeatureDataset('train', dictionary, adaptive=True) val_dset = VQAFeatureDataset('val', dictionary, adaptive=True) batch_size = args.batch_size constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_dset, args.num_hid, args.op, args.gamma).cuda() tfidf = None weights = None if args.tfidf: dict = Dictionary.load_from_file('data/dictionary.pkl') tfidf, weights = tfidf_from_questions(['train', 'val', 'test2015'],
def create_glove_embedding_init(idx2word, glove_file): word2emb = {} with open(glove_file, 'r') as f: entries = f.readlines() emb_dim = len(entries[0].split(' ')) - 1 print('embedding dim is %d' % emb_dim) weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32) for entry in entries: vals = entry.split(' ') word = vals[0] vals = list(map(float, vals[1:])) word2emb[word] = np.array(vals) for idx, word in enumerate(idx2word): if word not in word2emb: continue weights[idx] = word2emb[word] return weights, word2emb if __name__ == '__main__': d = create_dictionary('data') d.dump_to_file('data/dictionary_imsitu_final.pkl') d = Dictionary.load_from_file('data/dictionary_imsitu_final.pkl') emb_dim = 300 glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save('data/glove6b_init_imsitu_final_%dd.npy' % emb_dim, weights)
args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if not torch.cuda.is_available(): raise ValueError("CUDA is not available," + "this code currently only support GPU.") n_device = torch.cuda.device_count() print("Found %d GPU cards for eval" % (n_device)) device = torch.device("cuda") dictionary = Dictionary.load_from_file( os.path.join(args.data_folder, 'glove/dictionary.pkl')) hps_file = f'{args.output_folder}/hps.json' model_hps = Struct(json.load(open(hps_file))) batch_size = model_hps.batch_size*n_device print("Evaluating on %s dataset with model trained on %s dataset" % (args.dataset, model_hps.dataset)) if args.dataset == "vqa_cp": coco_train_features = Image_Feature_Loader( 'train', model_hps.relation_type, adaptive=model_hps.adaptive, dataroot=model_hps.data_folder) coco_val_features = Image_Feature_Loader( 'val', model_hps.relation_type, adaptive=model_hps.adaptive,
def main(): args = parse_args() dataset = args.dataset args.output = os.path.join('logs', args.output) if not os.path.isdir(args.output): utils.create_dir(args.output) else: if click.confirm('Exp directory already exists in {}. Erase?'.format( args.output, default=False)): os.system('rm -r ' + args.output) utils.create_dir(args.output) else: os._exit(1) if dataset == 'cpv1': dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl') elif dataset == 'cpv2' or dataset == 'v2': dictionary = Dictionary.load_from_file('data/dictionary.pkl') print("Building train dataset...") train_dset = VQAFeatureDataset('train', dictionary, dataset=dataset, cache_image_features=args.cache_features) print("Building test dataset...") eval_dset = VQAFeatureDataset('val', dictionary, dataset=dataset, cache_image_features=args.cache_features) get_bias(train_dset, eval_dset) # Build the model using the original constructor constructor = 'build_%s' % args.model model = getattr(CCB_model, constructor)(train_dset, args.num_hid).cuda() #or base_model if dataset == 'cpv1': model.w_emb.init_embedding('data/glove6b_init_300d_v1.npy') elif dataset == 'cpv2' or dataset == 'v2': model.w_emb.init_embedding('data/glove6b_init_300d.npy') # Add the loss_fn based our arguments if args.debias == "bias_product": model.debias_loss_fn = BiasProduct() elif args.debias == "none": model.debias_loss_fn = Plain() elif args.debias == "reweight": model.debias_loss_fn = ReweightByInvBias() elif args.debias == "learned_mixin": model.debias_loss_fn = LearnedMixin(args.entropy_penalty) elif args.debias == 'focal': model.debias_loss_fn = Focal() elif args.debias == 'CCB_loss': model.debias_loss_fn = CCB_loss(args.entropy_penalty) else: raise RuntimeError(args.mode) with open('util/qid2type_%s.json' % args.dataset, 'r') as f: qid2type = json.load(f) model = model.cuda() batch_size = args.batch_size torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=5) eval_loader = DataLoader(eval_dset, batch_size, shuffle=False, num_workers=5) print("Starting training...") train(model, train_loader, eval_loader, args, qid2type)
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) net = Question_Classifier(args.bert_mode, args.bert_pretrain, num_classes=3) save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__))) if args.resume_epoch != 0: runs = sorted( glob.glob( os.path.join(save_dir_root, 'run', args.train_fold, 'run_*'))) run_id = int(runs[-1].split('_')[-1]) if runs else 0 else: runs = sorted( glob.glob( os.path.join(save_dir_root, 'run', args.train_fold, 'run_*'))) run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0 if args.run_id >= 0: run_id = args.run_id save_dir = os.path.join(save_dir_root, 'run', args.train_fold, 'run_' + str(run_id)) log_dir = os.path.join( save_dir, datetime.now().strftime('%b%d_%H-%M-%M%S') + '_' + socket.gethostname()) writer = SummaryWriter(log_dir=log_dir) logger = open(os.path.join(save_dir, 'log.txt'), 'w') logger.write( 'optim: SGD \nlr=%.4f\nweight_decay=%.4f\nmomentum=%.4f\nupdate_lr_every=%d\nseed=%d\n' % (args.lr, args.weight_decay, args.momentum, args.update_lr_every, args.seed)) if not os.path.exists(os.path.join(save_dir, 'models')): os.makedirs(os.path.join(save_dir, 'models')) if args.resume_epoch == 0: print('Training from scratch...') else: net_resume_path = os.path.join( save_dir, 'models', 'mcnet_epoch-' + str(args.resume_epoch - 1) + '.pth') print('Initializing weights from: {}, epoch: {}...'.format( save_dir, resume_epoch)) net.load_state_dict( torch.load(net_resume_path, map_location=lambda storage, loc: storage)) torch.cuda.set_device(device=0) net.cuda() net_optim = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) dictionary = Dictionary.load_from_file(args.dictionary_path) trainset0 = Question_Dataset('train0', dictionary, args.data_root, question_len=12) trainset1 = Question_Dataset('train1', dictionary, args.data_root, question_len=12) trainset2 = Question_Dataset('train2', dictionary, args.data_root, question_len=12) valset = Question_Dataset('val', dictionary, args.data_root, question_len=12) testset = Question_Dataset('test', dictionary, args.data_root, question_len=12) trainloader0 = DataLoader(trainset0, batch_size=args.batch_size, shuffle=True, num_workers=2) trainloader1 = DataLoader(trainset1, batch_size=args.batch_size, shuffle=True, num_workers=2) trainloader2 = DataLoader(trainset2, batch_size=args.batch_size, shuffle=True, num_workers=2) valloader = DataLoader(valset, batch_size=args.batch_size, shuffle=False, num_workers=2) testloader = DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=2) num_iter_tr = len(trainloader0) nitrs = args.resume_epoch * num_iter_tr nsamples = args.batch_size * nitrs print('each_epoch_num_iter: %d' % (num_iter_tr)) global_step = 0 epoch_losses = [] recent_losses = [] start_t = time.time() print('Training Network') for epoch in range(args.resume_epoch, args.nepochs): net.train() epoch_losses = [] for ii, (sample_batched0, sample_batched1, sample_batched2) in enumerate( zip(trainloader0, trainloader1, trainloader2)): question0, label0 = sample_batched0['question'], sample_batched0[ 'label'] question0, label0 = question0.cuda(), label0.cuda() question1, label1 = sample_batched1['question'], sample_batched1[ 'label'] question1, label1 = question1.cuda(), label1.cuda() question2, label2 = sample_batched2['question'], sample_batched2[ 'label'] question2, label2 = question2.cuda(), label2.cuda() global_step += args.batch_size out0 = net.forward(question0) out1 = net.forward(question1) out2 = net.forward(question2) loss0 = utils.CELoss(logit=out0, target=label0, reduction='mean') loss1 = utils.CELoss(logit=out1, target=label1, reduction='mean') loss2 = utils.CELoss(logit=out2, target=label2, reduction='mean') loss = (loss0 + loss1 + loss2) / 3 trainloss = loss.item() epoch_losses.append(trainloss) if len(recent_losses) < args.log_every: recent_losses.append(trainloss) else: recent_losses[nitrs % len(recent_losses)] = trainloss net_optim.zero_grad() loss.backward() net_optim.step() nitrs += 1 nsamples += args.batch_size if nitrs % args.log_every == 0: meanloss = sum(recent_losses) / len(recent_losses) print('epoch: %d ii: %d trainloss: %.2f timecost:%.2f secs' % (epoch, ii, meanloss, time.time() - start_t)) writer.add_scalar('data/trainloss', meanloss, nsamples) # validation net.eval() val_acc = 0.0 test_acc = 0.0 for ii, sample_batched in enumerate(valloader): question, label = sample_batched['question'], sample_batched[ 'label'] question, label = question.cuda(), label.cuda() out = net.forward(question) tmp_acc = utils.cal_acc(out, label) val_acc += (tmp_acc * question.shape[0]) val_acc /= len(valset) for ii, sample_batched in enumerate(valloader): question, label = sample_batched['question'], sample_batched[ 'label'] question, label = question.cuda(), label.cuda() out = net.forward(question) tmp_acc = utils.cal_acc(out, label) test_acc += (tmp_acc * question.shape[0]) test_acc /= len(testset) print('Validation:') print('epoch: %d, val_questions: %d val_acc: %.4f' % (epoch, len(valset), val_acc)) print('epoch: %d, test_questions: %d test_acc: %.4f' % (epoch, len(testset), test_acc)) writer.add_scalar('data/valid_acc', val_acc, nsamples) if epoch % args.save_every == args.save_every - 1: net_save_path = os.path.join( save_dir, 'models', 'question_classifier_epoch-' + str(epoch) + '.pth') torch.save(net.state_dict(), net_save_path) print("Save net at {}\n".format(net_save_path)) if epoch % args.update_lr_every == args.update_lr_every - 1: lr_ = utils.lr_poly(args.lr, epoch, args.nepochs, 0.9) print('(poly lr policy) learning rate: ', lr_) net_optim = optim.SGD(net.parameters(), lr=lr_, momentum=args.momentum, weight_decay=args.weight_decay)
def main_worker(gpu, args): args.gpu = gpu if args.multiGPUs and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass if args.gpu is not None: print('Use GPU: {} for training'.format(args.gpu)) if args.multiGPUs: args.rank = gpu setup(args.rank, args.world_size) if args.gpu is not None: torch.cuda.set_device(args.gpu) args.workers = int( (args.workers + args.world_size - 1) / args.world_size) # prepare data if args.task == 'pvqa': dict_path = 'data/pvqa/pvqa_dictionary.pkl' dictionary = Dictionary.load_from_file(dict_path) test_dset = PVQAFeatureDataset(args.data_split, dictionary, adaptive=False) w_emb_path = 'data/pvqa/glove_pvqa_300d.npy' else: raise Exception('%s not implemented yet' % args.task) if args.task == 'pvqa': test_loader = DataLoader(test_dset, args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # prepare model model = BanModel(ntoken=test_dset.dictionary.ntoken, num_ans_candidates=test_dset.num_ans_candidates, num_hid=args.num_hid, v_dim=test_dset.v_dim, op=args.op, gamma=args.gamma, qa_bl=args.qa_bl) tfidf = None weights = None model.w_emb.init_embedding(w_emb_path, tfidf, weights) if args.multiGPUs: if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) args.workers = int( (args.workers + args.world_size - 1) / args.world_size) model = DDP(model, device_ids=[args.gpu]) else: model.cuda() model = DDP(model) else: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # load snapshot if args.input is not None: print('#8') print('loading %s' % args.input) if args.gpu is None: model_data = torch.load(args.input) else: loc = 'cuda:{}'.format(args.gpu) model_data = torch.load(args.input, map_location=loc) model_data_sd = model_data.get('model_state', model_data) model.load_state_dict(model_data_sd) res = evaluate(test_loader, model, args) eval_score = res['eval_score'] preds = res['preds'] anss = res['anss'] b_scores = [] b_scores_1 = [] b_scores_2 = [] b_scores_3 = [] assert len(preds) == len( anss), 'len(preds)=%d, len(anss)=%d' % (len(preds), len(anss)) for i in range(len(preds)): pred_ans = test_dset.label2ans[preds[i]] gt_ans = test_dset.entries[i]['ans_sent'] b_score = sentence_bleu(references=[str(gt_ans).lower().split()], hypothesis=str(pred_ans).lower().split()) b_score_1 = sentence_bleu(references=[str(gt_ans).lower().split()], hypothesis=str(pred_ans).lower().split(), weights=(1, 0, 0, 0)) b_score_2 = sentence_bleu(references=[str(gt_ans).lower().split()], hypothesis=str(pred_ans).lower().split(), weights=(0, 1, 0, 0)) b_score_3 = sentence_bleu(references=[str(gt_ans).lower().split()], hypothesis=str(pred_ans).lower().split(), weights=(0, 0, 1, 0)) b_scores.append(b_score) b_scores_1.append(b_score_1) b_scores_2.append(b_score_2) b_scores_3.append(b_score_3) b_score_m = np.mean(b_scores) b_score_m_1 = np.mean(b_scores_1) b_score_m_2 = np.mean(b_scores_2) b_score_m_3 = np.mean(b_scores_3) b_score_info = 'bleu score=%.4f\n' % b_score_m b_score_info_1 = 'bleu1 score=%.4f\n' % b_score_m_1 b_score_info_2 = 'bleu2 score=%.4f\n' % b_score_m_2 b_score_info_3 = 'bleu3 score=%.4f' % b_score_m_3 print(b_score_info) print(b_score_info_1) print(b_score_info_2) print(b_score_info_3) with open(os.path.join(args.output, 'type_result.txt'), 'a') as f: f.write(b_score_info) f.write(b_score_info_1) f.write(b_score_info_2) f.write(b_score_info_3)
parser.add_argument('--num_hid', type=int, default=1024) parser.add_argument('--model', type=str, default='baseline0_newatt') parser.add_argument('--output', type=str, default='saved_models/exp0') parser.add_argument('--batch_size', type=int, default=512) parser.add_argument('--seed', type=int, default=1111, help='random seed') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file('data/dictionary.pkl') train_dset = VQAFeatureDataset('train', dictionary) eval_dset = VQAFeatureDataset('val', dictionary) batch_size = args.batch_size constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') model = nn.DataParallel(model).cuda() train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1) eval_loader = DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1) train(model, train_loader, eval_loader, args.epochs, args.output)
args = parser.parse_args() return args if __name__ == '__main__': #args = parse_args() #torch.manual_seed(args.seed) torch.manual_seed(1111) #torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed(1111) torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file( os.path.join(saved_data_path, 'dictionary.pkl')) train_dset = VQAFeatureDataset_Relation(name='train', dictionary=dictionary) #eval_dset = VQAFeatureDataset3('val', dictionary) #test_dset = VQAFeatureDataset('test', dictionary) #batch_size = args.batch_size batch_size = 512 #constructor = 'build_%s' % args.model #constructor = 'build_%s' % 'baseline0_newatt' constructor = 'build_%s' % 'baseline0_both_guided_newatt' #model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda() model = getattr(final_base_model, constructor)(train_dset, 1024).cuda()
def main(): args = parse_args() dictionary = Dictionary.load_from_file('data/dictionary.pkl') cp = not args.nocp print("Building train dataset...") train_dset = VQAFeatureDataset('train', dictionary, cp=cp, cache_image_features=args.cache_features) print("Building test dataset...") eval_dset = VQAFeatureDataset('val', dictionary, cp=cp, cache_image_features=args.cache_features) answer_voc_size = train_dset.num_ans_candidates # Compute the bias: # The bias here is just the expected score for each answer/question type # question_type -> answer -> total score question_type_to_probs = defaultdict(Counter) # question_type -> num_occurances question_type_to_count = Counter() for ex in train_dset.entries: ans = ex["answer"] q_type = ans["question_type"] question_type_to_count[q_type] += 1 if ans["labels"] is not None: for label, score in zip(ans["labels"], ans["scores"]): question_type_to_probs[q_type][label] += score question_type_to_prob_array = {} for q_type, count in question_type_to_count.items(): prob_array = np.zeros(answer_voc_size, np.float32) for label, total_score in question_type_to_probs[q_type].items(): prob_array[label] += total_score prob_array /= count question_type_to_prob_array[q_type] = prob_array # Now add a `bias` field to each example for ds in [train_dset, eval_dset]: for ex in ds.entries: q_type = ex["answer"]["question_type"] ex["bias"] = question_type_to_prob_array[q_type] # Build the model using the original constructor constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') # Add the loss_fn based our arguments if args.mode == "bias_product": model.debias_loss_fn = BiasProduct() elif args.mode == "none": model.debias_loss_fn = Plain() elif args.mode == "reweight": model.debias_loss_fn = ReweightByInvBias() elif args.mode == "learned_mixin": model.debias_loss_fn = LearnedMixin(args.entropy_penalty) else: raise RuntimeError(args.mode) # Record the bias function we are using utils.create_dir(args.output) with open(args.output + "/debias_objective.json", "w") as f: js = model.debias_loss_fn.to_json() json.dump(js, f, indent=2) model = model.cuda() batch_size = args.batch_size torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True # The original version uses multiple workers, but that just seems slower on my setup train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=0) eval_loader = DataLoader(eval_dset, batch_size, shuffle=False, num_workers=0) print("Starting training...") train(model, train_loader, eval_loader, args.epochs, args.output, args.eval_each_epoch)
# utils.create_dir(args.output) # if 0 <= args.epoch: # model_label += '_epoch%d' % args.epoch # with open(args.output+'/%s_%s.json' \ # % (args.split, model_label), 'w') as f: # json.dump(results, f) # process(args, model, eval_loader) if __name__ == '__main__': args = parse_args() torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file('ban-vqa-demo/data/dictionary.pkl') ans2label_path = os.path.join('ban-vqa-demo/data/cache', 'trainval_ans2label.pkl') label2ans_path = os.path.join('ban-vqa-demo/data/cache', 'trainval_label2ans.pkl') ans2label = pkl.load(open(ans2label_path, 'rb')) label2ans = pkl.load(open(label2ans_path, 'rb')) num_ans_candidates = len(ans2label) eval_dset = VQAFeatureDataset_Custom(dictionary, len(ans2label), adaptive=True) print(ans2label) n_device = torch.cuda.device_count() batch_size = args.batch_size * n_device
results = [] for i in range(logits.size(0)): result = {} result['types'] = types[i] result['question_id'] = qIds[i] result['answer'] = get_answer(logits[i], dataloader) results.append(result) return results if __name__ == '__main__': args = parse_args() torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file('data/cocodictionary.pkl') eval_dset = COCOFeatureDataset(args.split, dictionary, adaptive=False) n_device = torch.cuda.device_count() batch_size = args.batch_size * n_device constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda() eval_loader = DataLoader(eval_dset, batch_size, shuffle=False, num_workers=1, collate_fn=utils.trim_collate) def process(args, model, eval_loader): model_path = args.input + '/model%s.pth' % \
def save_results(results, savedir): path_rslt = os.path.join(savedir, 'results.json') with open(path_rslt, 'w') as handle: json.dump(results, handle) if __name__ == '__main__': args = parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True #q_dict = Dictionary.load_from_file('data/question_dictionary.pkl') #c_dict = Dictionary.load_from_file('data/caption_dictionary.pkl') q_dict = Dictionary.load_from_file('data/VQAE/question_dictionary.pkl') c_dict = Dictionary.load_from_file('data/VQAE/explain_dictionary.pkl') #train_dset = VQAFeatureDataset('train', q_dict, c_dict, args.att_thr) #eval_dset = VQAFeatureDataset('val', q_dict, c_dict, args.att_thr) train_dset = VQAEDataset('train', q_dict, c_dict, 'cache/VQAE2') eval_dset = VQAEDataset('val', q_dict, c_dict, 'cache/VQAE2') #train_dset = VQAEVQA2Dataset('train', q_dict, c_dict, 'cache') #eval_dset = VQAEVQA2Dataset('val', q_dict, c_dict, 'cache') batch_size = args.batch_size constructor = 'build_%s' % args.model model = utils.factory(constructor, train_dset, args.num_hid, args.att_dim, args.decode_dim).cuda() model_path = os.path.join(args.output, 'model.pth')
total_number += 1 else: print('Hahahahahahahahahahaha') score = score / len(dataloader.dataset) V_loss /= len(dataloader.dataset) score_yesno /= total_yesno score_other /= total_other score_number /= total_number return score, score_yesno, score_other, score_number if __name__ == '__main__': opt = opts.parse_opt() dictionary = Dictionary.load_from_file(f'{opts.data_dir}/dictionary.pkl') opt.ntokens = dictionary.ntoken model = Model_explain2(opt) model = model.cuda() model = nn.DataParallel(model).cuda() # model = model.cuda() eval_dset = GraphQAIMGDataset('v2cp_test', dictionary, opt) eval_loader = DataLoader(eval_dset, opt.batch_size, shuffle=False, num_workers=0) states_ = torch.load('saved_models/%s/model-best.pth'%opt.load_model_states) states = model.state_dict() for k in states_.keys(): if k in states: states[k] = states_[k]
json.dump(results, handle) if __name__ == '__main__': args = parse_args() args.output = args.output + '_' + str(args.temperature) share_qe_dict = False vocab_source = 'VQAE' #### 'VQAE' or 'VQAv2' torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True if share_qe_dict: qe_dict = Dictionary.load_from_file( os.path.join('data', vocab_source, 'question_explain_dictionary.pkl')) else: q_dict = Dictionary.load_from_file( os.path.join('data', vocab_source, 'question_dictionary.pkl')) c_dict = Dictionary.load_from_file( os.path.join('data', vocab_source, 'explain_dictionary.pkl')) #train_dset = VQAFeatureDataset('train', q_dict, c_dict, 'cache/VQAE2',args.att_thr) #eval_dset = VQAFeatureDataset('val', q_dict, c_dict, 'cache/VQAE2',args.att_thr) train_dset = VQAEDataset('train', q_dict, c_dict, 'cache/VQAE2') eval_dset = VQAEDataset('val', q_dict, c_dict, 'cache/VQAE2') #train_dset = VQAEVQA2Dataset('train', q_dict, c_dict, 'cache') #eval_dset = VQAEVQA2Dataset('val', q_dict, c_dict, 'cache') batch_size = args.batch_size
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu dictionary = Dictionary.load_from_file(args.dictionary_path) feature_dict = {} ques_net = Question_Classifier(args.bert_mode, args.bert_pretrain, num_classes=args.ques_num_classes) img_net = Network(backbone_type=args.backbone_type, num_classes=args.img_num_classes) cls_net = models.resnet34(pretrained=False, num_classes=2) cls_net = cls_net.cuda() ques_net = ques_net.cuda() img_net = img_net.cuda() cls_net.load_state_dict(torch.load(args.cls2_model_path)) ques_net.load_state_dict( torch.load(args.ques_model_path, map_location=lambda storage, loc: storage)) img_net.load_model(args.img_model_path) eval_dset = VQAFeatureDataset(args.split, dictionary, args.data_root, question_len=12, clip=True) eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=False, num_workers=2) cls_net.eval() ques_net.eval() img_net.eval() gt_list = [] with torch.no_grad(): for v, q, a, ans_type, q_types, image_name in tqdm(iter(eval_loader)): v, q, a = v.cuda(), q.cuda(), a.cuda() v = v.reshape(v.shape[0], 3, 224, 224) q_prob = ques_net(q) # ques_num_classes q_prob = q_prob[ 0] # [0: closed-ended-normal, 1: closed-ended-abnormal 2: open-ended] q_type = torch.argmax(q_prob) v_prob, feature = img_net(v) # 1 x img_num_classes if q_type == 0 or q_type == 1: continue else: feature = feature.cpu().numpy().tolist() temp_list = [] for i in feature: temp_list.append(round(i, 4)) gt = torch.argmax(a[0]).item() if gt not in gt_list: gt_list.append(gt) feature_dict[gt] = [temp_list] elif gt in gt_list: feature_dict[gt].append(temp_list) json.dump(feature_dict, open('feature_dict.json', 'w'))
emb_dim = len(entries[0].split(' ')) - 1 print('embedding dim is %d' % emb_dim) weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32) for entry in entries: vals = entry.split(' ') word = vals[0] vals = list(map(float, vals[1:])) word2emb[word] = np.array(vals) for idx, word in enumerate(idx2word): if word not in word2emb: continue weights[idx] = word2emb[word] return weights, word2emb if __name__ == '__main__': args = parse_args() dataroot = 'data' if args.task == 'vqa' else 'data/flickr30k' dictionary_path = os.path.join(dataroot, 'dictionary.pkl') d = create_dictionary(dataroot, args.task) d.dump_to_file(dictionary_path) d = Dictionary.load_from_file(dictionary_path) emb_dim = 300 glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save(os.path.join(dataroot, 'glove6b_init_%dd.npy' % emb_dim), weights)
def main_worker(gpu, args): args.gpu = gpu if args.multiGPUs and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass if args.gpu is not None: print('Use GPU: {} for training'.format(args.gpu)) if args.multiGPUs: args.rank = gpu setup(args.rank, args.world_size) if args.gpu is not None: torch.cuda.set_device(args.gpu) args.workers = int( (args.workers + args.world_size - 1) / args.world_size) # prepare data if args.task == 'pvqa': dict_path = 'data/pvqa/pvqa_dictionary.pkl' dictionary = Dictionary.load_from_file(dict_path) train_dset = PVQAFeatureDataset(args.train, dictionary, adaptive=False) val_dset = PVQAFeatureDataset(args.val, dictionary, adaptive=False) w_emb_path = 'data/pvqa/glove_pvqa_300d.npy' else: raise Exception('%s not implemented yet' % args.task) if args.multiGPUs: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dset) else: train_sampler = None if args.task == 'pvqa': train_loader = DataLoader(train_dset, args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler) eval_loader = DataLoader(val_dset, args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # prepare model model = BanModel(ntoken=train_dset.dictionary.ntoken, num_ans_candidates=train_dset.num_ans_candidates, num_hid=args.num_hid, v_dim=train_dset.v_dim, op=args.op, gamma=args.gamma) tfidf = None weights = None model.w_emb.init_embedding(w_emb_path, tfidf, weights) if args.multiGPUs: if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) args.workers = int( (args.workers + args.world_size - 1) / args.world_size) model = DDP(model, device_ids=[args.gpu]) else: model.cuda() model = DDP(model) else: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # load snapshot if args.input is not None: print('#8') print('loading %s' % args.input) if args.gpu is None: model_data = torch.load(args.input) else: loc = 'cuda:{}'.format(args.gpu) model_data = torch.load(args.input, map_location=loc) model_data_sd = model_data.get('model_state', model_data) for name, param in model.named_parameters(): if name in model_data_sd: param.data = model_data_sd[name] # optimizer = torch.optim.Adamax(filter(lambda p: p.requires_grad, model.parameters())) # optimizer.load_state_dict(model_data.get('optimizer_state', model_data)) args.start_epoch = model_data['epoch'] + 1 optimizer = torch.optim.Adamax( filter(lambda p: p.requires_grad, model.parameters())) best_eval_score = 0 for epoch in range(args.start_epoch, args.epochs): if args.multiGPUs: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, eval_loader, train_dset, model, optimizer, epoch, args) eval_score = evaluate(eval_loader, model, args) with open(os.path.join(args.output, 'log.log'), 'a') as f: f.write(str(datetime.datetime.now())) f.write('epoch=%d' % epoch) f.write('eval_score=%.4f' % eval_score) print('eval_score=', eval_score) print('best eval_score = ', best_eval_score) if not args.multiGPUs or (args.multiGPUs and args.gpu == 0): if eval_score > best_eval_score: model_path = os.path.join(args.output, 'model_best.pth') utils.save_model(model_path, model, epoch, optimizer) best_eval_score = eval_score
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--pretrained_buatt_model', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--use_pretrained_buatt', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations') parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images') parser.add_argument('--frcnn_feat_dir', type=str, help='Location of output from detectron') parser.add_argument('--train_file', default="train_new_2000_all.json", type=str, help='trainfile name') parser.add_argument('--dev_file', default="dev_new_2000_all.json", type=str, help='dev file name') parser.add_argument('--test_file', default="test_new_2000_all.json", type=str, help='test file name') parser.add_argument('--model_saving_name', type=str, help='save name of the outpul model') parser.add_argument('--epochs', type=int, default=500) parser.add_argument('--num_hid', type=int, default=1024) parser.add_argument('--model', type=str, default='baseline0grid_imsitu_agent') parser.add_argument('--output', type=str, default='saved_models/exp0') parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--num_iter', type=int, default=1) parser.add_argument('--seed', type=int, default=1111, help='random seed') #todo: train role module separately with gt verbs args = parser.parse_args() clip_norm = 0.25 n_epoch = args.epochs batch_size = args.batch_size n_worker = 3 #dataset_folder = 'imSitu' #imgset_folder = 'resized_256' dataset_folder = args.dataset_folder imgset_folder = args.imgset_dir print('model spec :, top down att with role q ') train_set = json.load(open(dataset_folder + '/' + args.train_file)) imsitu_roleq = json.load(open("data/imsitu_questions_prev.json")) dict_path = 'data/dictionary_imsitu_roleall.pkl' dictionary = Dictionary.load_from_file(dict_path) w_emb_path = 'data/glove6b_init_imsitu_roleall_300d.npy' encoder = imsitu_encoder(train_set, imsitu_roleq, dictionary) train_set = imsitu_loader_roleq_buatt_place(imgset_folder, train_set, encoder, dictionary, 'train', encoder.train_transform) constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_set, args.num_hid, len(encoder.place_label_list), encoder) model.w_emb.init_embedding(w_emb_path) #print('MODEL :', model) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + '/' + args.dev_file)) dev_set = imsitu_loader_roleq_buatt_place(imgset_folder, dev_set, encoder, dictionary, 'val', encoder.dev_transform) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) test_set = json.load(open(dataset_folder + '/' + args.test_file)) test_set = imsitu_loader_roleq_buatt_place(imgset_folder, test_set, encoder, dictionary, 'test', encoder.dev_transform) test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) torch.manual_seed(1234) if args.gpuid >= 0: #print('GPU enabled') model.cuda() torch.cuda.manual_seed(1234) torch.backends.cudnn.deterministic = True if args.use_pretrained_buatt: print('Use pretrained from: {}'.format(args.pretrained_buatt_model)) if len(args.pretrained_buatt_model) == 0: raise Exception('[pretrained buatt module] not specified') #model_data = torch.load(args.pretrained_ban_model, map_location='cpu') #model.load_state_dict(model_data.get('model_state', model_data)) utils_imsitu.load_net_ban(args.pretrained_buatt_model, [model], ['module'], ['w_emb', 'classifier']) model_name = 'pre_trained_buatt' elif args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained module] not specified') utils_imsitu.load_net(args.resume_model, [model]) optimizer_select = 0 model_name = 'resume_all' else: print('Training from the scratch.') model_name = 'train_full' utils_imsitu.set_trainable(model, True) #utils_imsitu.set_trainable(model.classifier, True) #utils_imsitu.set_trainable(model.w_emb, True) #utils_imsitu.set_trainable(model.q_emb, True) optimizer = torch.optim.Adamax([ { 'params': model.classifier.parameters() }, { 'params': model.w_emb.parameters() }, { 'params': model.q_emb.parameters(), 'lr': 5e-4 }, { 'params': model.v_att.parameters(), 'lr': 5e-5 }, { 'params': model.q_net.parameters(), 'lr': 5e-5 }, { 'params': model.v_net.parameters(), 'lr': 5e-5 }, ], lr=1e-3) #utils_imsitu.set_trainable(model, True) #optimizer = torch.optim.Adamax(model.parameters(), lr=1e-3) #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) if args.evaluate: top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results_nouns() top5_avg = top5.get_average_results_nouns() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Dev average :{:.2f} {} {}'.format( avg_score * 100, utils_imsitu.format_dict(top1_avg, '{:.2f}', '1-'), utils_imsitu.format_dict(top5_avg, '{:.2f}', '5-'))) #write results to csv file role_dict = top1.role_dict fail_val_all = top1.value_all_dict pass_val_dict = top1.vall_all_correct with open('role_pred_data.json', 'w') as fp: json.dump(role_dict, fp, indent=4) with open('fail_val_all.json', 'w') as fp: json.dump(fail_val_all, fp, indent=4) with open('pass_val_all.json', 'w') as fp: json.dump(pass_val_dict, fp, indent=4) print('Writing predictions to file completed !') elif args.test: top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results_nouns() top5_avg = top5.get_average_results_nouns() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Test average :{:.2f} {} {}'.format( avg_score * 100, utils_imsitu.format_dict(top1_avg, '{:.2f}', '1-'), utils_imsitu.format_dict(top5_avg, '{:.2f}', '5-'))) else: print('Model training started!') train(model, train_loader, dev_loader, None, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, None, model_name, args.model_saving_name, args)
def create_glove_embedding_init(idx2word, glove_file): word2emb = {} with open(glove_file, 'r') as f: entries = f.readlines() emb_dim = len(entries[0].split(' ')) - 1 print('embedding dim is %d' % emb_dim) weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32) for entry in entries: vals = entry.split(' ') word = vals[0] #vals = map(float, vals[1:]) valv = [float(v) for v in vals[1:]] word2emb[word] = np.array(valv) for idx, word in enumerate(idx2word): if word not in word2emb: continue weights[idx] = word2emb[word] return weights, word2emb if __name__ == '__main__': d = create_dictionary('data') d.dump_to_file('data/caption_dictionary.pkl') d = Dictionary.load_from_file('data/caption_dictionary.pkl') emb_dim = 300 glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save('data/glove6b_caption_init_%dd.npy' % emb_dim, weights)