def load_model(opts, device): hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(load_json(hps_file)) model_config = f'{opts.output_dir}/log/model_config.json' # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) img_pos_embed_weight_key = ( "v_encoder.f_encoder.img_embeddings.position_embeddings.weight") assert img_pos_embed_weight_key in checkpoint max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) model = HeroForVcmr.from_pretrained( model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len, lw_neg_ctx=model_opts.lw_neg_ctx, lw_neg_q=model_opts.lw_neg_q, lw_st_ed=0, ranking_loss_type=model_opts.ranking_loss_type, use_hard_negative=False, hard_pool_size=model_opts.hard_pool_size, margin=model_opts.margin, use_all_neg=model_opts.use_all_neg, drop_svmr_prob=model_opts.drop_svmr_prob) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') return model, model_opts
def main(opts): hvd.init() device = torch.device("cuda") # support single GPU only train_opts = Struct(json.load(open(f'{opts.train_dir}/log/hps.json'))) if 'paired' in train_opts.model: EvalDatasetCls = Nlvr2PairedEvalDataset eval_collate_fn = nlvr2_paired_eval_collate if train_opts.model == 'paired': ModelCls = UniterForNlvr2Paired elif train_opts.model == 'paired-attn': ModelCls = UniterForNlvr2PairedAttn else: raise ValueError('unrecognized model type') elif train_opts.model == 'triplet': EvalDatasetCls = Nlvr2TripletEvalDataset ModelCls = UniterForNlvr2Triplet eval_collate_fn = nlvr2_triplet_eval_collate else: raise ValueError('unrecognized model type') img_db = DetectFeatLmdb(opts.img_db, train_opts.conf_th, train_opts.max_bb, train_opts.min_bb, train_opts.num_bb, opts.compressed_db) txt_db = TxtTokLmdb(opts.txt_db, -1) dset = EvalDatasetCls(txt_db, img_db, train_opts.use_img_type) batch_size = (train_opts.val_batch_size if opts.batch_size is None else opts.batch_size) sampler = TokenBucketSampler(dset.lens, bucket_size=BUCKET_SIZE, batch_size=batch_size, droplast=False) eval_dataloader = DataLoader(dset, batch_sampler=sampler, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=eval_collate_fn) eval_dataloader = PrefetchLoader(eval_dataloader) # Prepare model ckpt_file = f'{opts.train_dir}/ckpt/model_step_{opts.ckpt}.pt' checkpoint = torch.load(ckpt_file) model_config = UniterConfig.from_json_file( f'{opts.train_dir}/log/model.json') model = ModelCls(model_config, img_dim=IMG_DIM) model.init_type_embedding() model.load_state_dict(checkpoint, strict=False) model.to(device) model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') results = evaluate(model, eval_dataloader, device) # write results if not exists(opts.output_dir): os.makedirs(opts.output_dir) with open(f'{opts.output_dir}/results.csv', 'w') as f: for id_, ans in results: f.write(f'{id_},{ans}\n') print(f'all results written')
def main(opts): hvd.init() if hvd.rank() == 0: toker = RobertaTokenizer.from_pretrained('roberta-base') all_gather_list(None) else: all_gather_list(None) toker = RobertaTokenizer.from_pretrained('roberta-base') model_opts = Struct(json.load(open(f"{opts.model_dir}/log/hps.json"))) model_config = f"{opts.model_dir}/log/model_config.json" video_db = load_video_sub_dataset(model_opts.vfeat_db, model_opts.sub_txt_db, model_opts.vfeat_interval, model_opts) dset = TvcEvalDataset(video_db, opts.target_clip) loader = build_dataloader(dset, opts.batch_size, TvcEvalDataset.collate, False, opts) checkpoint = torch.load(f"{opts.model_dir}/ckpt/" f"model_step_{opts.ckpt_step}.pt") img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\ ".position_embeddings.weight" if img_pos_embed_weight_key in checkpoint: max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) else: max_frm_seq_len = MAX_FRM_SEQ_LEN model = HeroForTvc.from_pretrained(model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len, lsr=model_opts.lsr) model.cuda() model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') bos = toker.convert_tokens_to_ids(['<s>'])[0] eos = toker.convert_tokens_to_ids(['</s>'])[0] model.eval() generator = TvcGenerator(model, opts.max_gen_step, bos, eos, opts.fp16) results = decode(loader, generator, toker) save_jsonl(results, opts.output) # evaluate score if possible if (hvd.rank() == 0 and 'descs' in json.loads(next(iter(open(opts.target_clip))))): evaluator = TVCEval(opts.target_clip) score = evaluator(results) print(score)
def main(opts): hvd.init() n_gpu = hvd.size() print('fasfafs: ', n_gpu) device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if opts.train_config is not None: train_opts = Struct(json.load(open(opts.train_config))) opts.conf_th = train_opts.conf_th opts.max_bb = train_opts.max_bb opts.min_bb = train_opts.min_bb opts.num_bb = train_opts.num_bb # load DBs and image dirs eval_img_db = DetectFeatLmdb(opts.img_db, opts.conf_th, opts.max_bb, opts.min_bb, opts.num_bb, opts.compressed_db) eval_txt_db = TxtTokLmdb(opts.txt_db, -1) eval_dataset = ItmEvalDataset(eval_txt_db, eval_img_db, opts.batch_size) # Prepare model checkpoint = torch.load(opts.checkpoint) model = UniterForImageTextRetrieval.from_pretrained(opts.model_config, checkpoint, img_dim=IMG_DIM) if 'rank_output' not in checkpoint: model.init_output() # zero shot setting model.to(device) model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=1, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=itm_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) eval_log, results = evaluate(model, eval_dataloader)
def build_dataloader(opts): # Load ground truth, query db and video db hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(load_json(hps_file)) video_ids = get_video_ids(opts.query_txt_db) video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts) assert opts.split in opts.query_txt_db, (opts.split, opts.query_txt_db) q_txt_db = QueryTokLmdb(opts.query_txt_db, -1) eval_dataset = VcmrFullEvalDataset(video_ids, video_db, q_txt_db, distributed=model_opts.distributed_eval) eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=vcmr_full_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) return eval_dataloader
def main(opts): hvd.init() n_gpu = hvd.size() print('fasfafs: ', n_gpu) device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if opts.train_config is not None: train_opts = Struct(json.load(open(opts.train_config))) opts.conf_th = train_opts.conf_th opts.max_bb = train_opts.max_bb opts.min_bb = train_opts.min_bb opts.num_bb = train_opts.num_bb # Prepare model checkpoint = torch.load(opts.checkpoint) model = UniterForImageTextRetrieval.from_pretrained(opts.model_config, checkpoint, img_dim=IMG_DIM) if 'rank_output' not in checkpoint: model.init_output() # zero shot setting # model.to(device) # model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') # load DBs and image dirs # npz_path = os.listdir('/root/output_meme_butd') # npz_path = ['/root/output_meme_butd/' + i for i in npz_path] json_files = open("/root/meme/train.json", "r") json_files = json_files.read().split('\n') json_files = [json.loads(i) for i in json_files] LOGGER.info('load {} file '.format(len(json_files))) eval_log, results = evaluate(model, json_files)
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if opts.train_config is not None: train_opts = Struct(json.load(open(opts.train_config))) opts.conf_th = train_opts.conf_th opts.max_bb = train_opts.max_bb opts.min_bb = train_opts.min_bb opts.num_bb = train_opts.num_bb # load DBs and image dirs eval_img_db = DetectFeatLmdb(opts.img_db, opts.conf_th, opts.max_bb, opts.min_bb, opts.num_bb, opts.compressed_db) eval_txt_db = TxtTokLmdb(opts.txt_db, -1) eval_dataset = ItmEvalDataset(eval_txt_db, eval_img_db, opts.batch_size) # Prepare model checkpoint = torch.load(opts.checkpoint) model = UniterForImageTextRetrieval.from_pretrained(opts.model_config, checkpoint, img_dim=IMG_DIM) if 'rank_output' not in checkpoint: model.init_output() # zero shot setting model.to(device) model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=1, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=itm_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) eval_log, results = evaluate(model, eval_dataloader) if hvd.rank() == 0: if not exists(opts.output_dir) and rank == 0: os.makedirs(opts.output_dir) with open(f'{opts.output_dir}/config.json', 'w') as f: json.dump(vars(opts), f) with open(f'{opts.output_dir}/results.bin', 'wb') as f: pickle.dump(results, f) with open(f'{opts.output_dir}/scores.json', 'w') as f: json.dump(eval_log, f) LOGGER.info(f'evaluation finished') LOGGER.info( f"======================== Results =========================\n" f"image retrieval R1: {eval_log['img_r1']*100:.2f},\n" f"image retrieval R5: {eval_log['img_r5']*100:.2f},\n" f"image retrieval R10: {eval_log['img_r10']*100:.2f}\n" f"text retrieval R1: {eval_log['txt_r1']*100:.2f},\n" f"text retrieval R5: {eval_log['txt_r5']*100:.2f},\n" f"text retrieval R10: {eval_log['txt_r10']*100:.2f}") LOGGER.info("========================================================")
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) hps_file = f"{opts.output_dir}/log/hps.json" model_opts = Struct(json.load(open(hps_file))) # train_examples = None ans2label_file = f"{opts.output_dir}/ckpt/ans2label.json" ans2label = json.load(open(ans2label_file)) label2ans = {label: ans for ans, label in ans2label.items()} # load DBs and image dirs eval_img_db = DetectFeatLmdb( opts.img_db, model_opts.conf_th, model_opts.max_bb, model_opts.min_bb, model_opts.num_bb, opts.compressed_db, ) eval_txt_db = TxtTokLmdb(opts.txt_db, -1) eval_dataset = VqaEvalDataset(len(ans2label), eval_txt_db, eval_img_db) # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f"{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt" checkpoint = torch.load(ckpt_file) model = UniterForVisualQuestionAnswering.from_pretrained( f"{opts.output_dir}/log/model.json", checkpoint, img_dim=IMG_DIM, num_answer=len(ans2label), ) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=True, opt_level="O2") sampler = TokenBucketSampler( eval_dataset.lens, bucket_size=BUCKET_SIZE, batch_size=opts.batch_size, droplast=False, ) eval_dataloader = DataLoader( eval_dataset, batch_sampler=sampler, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=vqa_eval_collate, ) eval_dataloader = PrefetchLoader(eval_dataloader) val_log, results, logits = evaluate(model, eval_dataloader, label2ans, opts.save_logits) result_dir = f"{opts.output_dir}/results_test" if not exists(result_dir) and rank == 0: os.makedirs(result_dir) all_results = list(concat(all_gather_list(results))) if opts.save_logits: all_logits = {} for id2logit in all_gather_list(logits): all_logits.update(id2logit) if hvd.rank() == 0: with open(f"{result_dir}/" f"results_{opts.checkpoint}_all.json", "w") as f: json.dump(all_results, f) if opts.save_logits: np.savez(f"{result_dir}/logits_{opts.checkpoint}_all.npz", **all_logits)
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if hvd.rank() != 0: LOGGER.disabled = True hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(json.load(open(hps_file))) model_config = f'{opts.output_dir}/log/model_config.json' # load DBs and image dirs video_ids = get_video_ids(opts.query_txt_db) video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts) assert opts.split in opts.query_txt_db q_txt_db = QaQueryTokLmdb(opts.query_txt_db, -1) eval_dataset = ViolinEvalDataset(video_ids, video_db, q_txt_db, sampled_by_q=model_opts.sampled_by_q) collate_fn = violin_eval_collate # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\ ".position_embeddings.weight" assert img_pos_embed_weight_key in checkpoint max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) model = HeroForViolin.from_pretrained(model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=collate_fn) eval_dataloader = PrefetchLoader(eval_dataloader) _, results, logits = validate_violin(model, eval_dataloader, opts.split, opts.save_logits) result_dir = f'{opts.output_dir}/results_{opts.split}' if opts.save_logits: result_dir += '_w_logit' if not exists(result_dir) and hvd.rank() == 0: os.makedirs(result_dir) all_results = {} for id2res in all_gather_list(results): all_results.update(id2res) if opts.save_logits: all_logits = {} for id2logit in all_gather_list(logits): all_logits.update(id2logit) if hvd.rank() == 0: save_json(all_results, f'{result_dir}/results_{opts.checkpoint}_all.json') LOGGER.info('All results written......') if opts.save_logits: save_pickle(all_logits, f'{result_dir}/logits_{opts.checkpoint}_all.pkl') LOGGER.info('All logits written......')
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format( device, n_gpu, hvd.rank(), opts.fp16)) hps_file = f'{opts.output_dir}/log/hps.json' model_opts = json.load(open(hps_file)) if 'mlp' not in model_opts: model_opts['mlp'] = 1 model_opts = Struct(model_opts) # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_epoch_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) model = UniterForReferringExpressionComprehension.from_pretrained( f'{opts.output_dir}/log/model.json', checkpoint, img_dim=IMG_DIM, mlp=model_opts.mlp) model.to(device) hvd.broadcast_parameters(model.state_dict(), root_rank=0) if opts.fp16: model = amp.initialize(model, enabled=True, opt_level='O2') # load DBs and image dirs img_db_type = "gt" if "coco_gt" in opts.img_db else "det" conf_th = -1 if img_db_type == "gt" else model_opts.conf_th num_bb = 100 if img_db_type == "gt" else model_opts.num_bb eval_img_db = DetectFeatLmdb(opts.img_db, conf_th, model_opts.max_bb, model_opts.min_bb, num_bb, opts.compressed_db) # Prepro txt_dbs txt_dbs = opts.txt_db.split(':') for txt_db in txt_dbs: print(f'Evaluating {txt_db}') eval_txt_db = ReTxtTokLmdb(txt_db, -1) eval_dataset = ReEvalDataset( eval_txt_db, eval_img_db, use_gt_feat=img_db_type == "gt") sampler = DistributedSampler(eval_dataset, num_replicas=n_gpu, rank=rank, shuffle=False) eval_dataloader = DataLoader(eval_dataset, sampler=sampler, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=re_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) # evaluate val_log, results = evaluate(model, eval_dataloader) result_dir = f'{opts.output_dir}/results_test' if not exists(result_dir) and rank == 0: os.makedirs(result_dir) write_to_tmp( f"{txt_db.split('_')[1].split('.')[0]}-acc({img_db_type}): {results['acc']*100:.2f}% ", args.tmp_file) all_results = list(concat(all_gather_list(results))) if hvd.rank() == 0: db_split = txt_db.split('/')[-1].split('.')[0] # refcoco+_val img_dir = opts.img_db.split('/')[-1] # re_coco_gt with open(f'{result_dir}/' f'results_{opts.checkpoint}_{db_split}_on_{img_dir}_all.json', 'w') as f: json.dump(all_results, f) # print print(f'{opts.output_dir}/results_test') write_to_tmp(f'\n', args.tmp_file)
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format( device, n_gpu, hvd.rank(), opts.fp16)) if rank != 0: LOGGER.disabled = True hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(json.load(open(hps_file))) assert opts.split in opts.img_db and opts.split in opts.txt_db # load DBs and image dirs eval_img_db, eval_img_db_gt = load_img_feat(opts.img_db, model_opts) eval_txt_db = VcrTxtTokLmdb(opts.txt_db, -1) eval_dataset = VcrEvalDataset( "test", eval_txt_db, img_db=eval_img_db, img_db_gt=eval_img_db_gt) # Prepare model model = UniterForVisualCommonsenseReasoning.from_pretrained( f'{opts.output_dir}/log/model.json', state_dict={}, img_dim=IMG_DIM) model.init_type_embedding() model.init_word_embedding(NUM_SPECIAL_TOKENS) if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) state_dict = checkpoint.get('model_state', checkpoint) matched_state_dict = {} unexpected_keys = set() missing_keys = set() for name, param in model.named_parameters(): missing_keys.add(name) for key, data in state_dict.items(): if key in missing_keys: matched_state_dict[key] = data missing_keys.remove(key) else: unexpected_keys.add(key) LOGGER.info(f"Unexpected_keys: {list(unexpected_keys)}") LOGGER.info(f"Missing_keys: {list(missing_keys)}") model.load_state_dict(matched_state_dict, strict=False) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=True, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, shuffle=False, collate_fn=vcr_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) _, results = evaluate(model, eval_dataloader) result_dir = f'{opts.output_dir}/results_{opts.split}' if not exists(result_dir) and rank == 0: os.makedirs(result_dir) all_results = {} for id2res in all_gather_list(results): all_results.update(id2res) if hvd.rank() == 0: with open(f'{result_dir}/' f'results_{opts.checkpoint}_all.json', 'w') as f: json.dump(all_results, f) probs_df = save_for_submission( f'{result_dir}/results_{opts.checkpoint}_all.json') probs_df.to_csv(f'{result_dir}/results_{opts.checkpoint}_all.csv')
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, 16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if hvd.rank() != 0: LOGGER.disabled = True hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(load_json(hps_file)) model_config = f'{opts.output_dir}/log/model_config.json' # load DBs and image dirs video_ids = get_video_ids(opts.query_txt_db) if opts.task != "didemo_video_only": video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts) else: txt_meta = load_json(os.path.join(opts.query_txt_db, "meta.json")) video_db = load_video_only_dataset(opts.vfeat_db, txt_meta, model_opts.vfeat_interval, model_opts) assert opts.split in opts.query_txt_db q_txt_db = QueryTokLmdb(opts.query_txt_db, -1) if opts.task != "didemo_video_only": inf_dataset = VcmrFullEvalDataset else: inf_dataset = VcmrVideoOnlyFullEvalDataset eval_dataset = inf_dataset(video_ids, video_db, q_txt_db, distributed=model_opts.distributed_eval) # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) img_pos_embed_weight_key = ("v_encoder.f_encoder.img_embeddings.position_embeddings.weight") assert img_pos_embed_weight_key in checkpoint max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) model = HeroForVcmr.from_pretrained( model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len, lw_neg_ctx=model_opts.lw_neg_ctx, lw_neg_q=model_opts.lw_neg_q, lw_st_ed=0, ranking_loss_type=model_opts.ranking_loss_type, use_hard_negative=False, hard_pool_size=model_opts.hard_pool_size, margin=model_opts.margin, use_all_neg=model_opts.use_all_neg, drop_svmr_prob=model_opts.drop_svmr_prob) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=vcmr_full_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) _, results = validate_full_vcmr(model, eval_dataloader, opts.split, opts, model_opts) result_dir = f'{opts.output_dir}/results_{opts.split}' if not exists(result_dir) and rank == 0: os.makedirs(result_dir) all_results_list = all_gather_list(results) if hvd.rank() == 0: # save for only one time all_results = {"video2idx": all_results_list[0]["video2idx"]} for rank_id in range(hvd.size()): for key, val in all_results_list[rank_id].items(): if key == "video2idx": continue if key not in all_results: all_results[key] = [] all_results[key].extend(all_results_list[rank_id][key]) LOGGER.info('All results joined......') # save_vr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vr.json') # save_vcmr_base_on_vr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vcmr_base_on_vr.json') save_vcmr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vcmr.json')
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if rank != 0: LOGGER.disabled = True hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(json.load(open(hps_file))) assert opts.split in opts.img_db and opts.split in opts.txt_db # load DBs and image dirs eval_img_db, eval_img_db_gt = load_img_feat(opts.img_db, model_opts) eval_txt_db = VcrTxtTokLmdb(opts.txt_db, -1) eval_dataset = VcrEvalDataset("val", eval_txt_db, img_db=eval_img_db, img_db_gt=eval_img_db_gt) # Prepare model model = UniterForVisualCommonsenseReasoning.from_pretrained( f'{opts.output_dir}/log/model.json', state_dict={}, img_dim=IMG_DIM) model.init_type_embedding() model.init_type_embedding_know() model.init_word_embedding(NUM_SPECIAL_TOKENS) if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) state_dict = checkpoint.get('model_state', checkpoint) matched_state_dict = {} unexpected_keys = set() missing_keys = set() for name, param in model.named_parameters(): missing_keys.add(name) for key, data in state_dict.items(): if key in missing_keys: matched_state_dict[key] = data missing_keys.remove(key) else: unexpected_keys.add(key) LOGGER.info(f"Unexpected_keys: {list(unexpected_keys)}") LOGGER.info(f"Missing_keys: {list(missing_keys)}") model.load_state_dict(matched_state_dict, strict=False) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=True, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, shuffle=False, collate_fn=vcr_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) results = evaluate(model, eval_dataloader) output = '/src/vlkaf.json' before_json = "" for i, item in enumerate(results): jstring = json.dumps(item) before_json += jstring + '\n' f = open(output, "w") f.write(before_json) f.close() '''