Exemple #1
0
def load_model(opts, device):
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    model_config = f'{opts.output_dir}/log/model_config.json'
    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = (
        "v_encoder.f_encoder.img_embeddings.position_embeddings.weight")
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])
    model = HeroForVcmr.from_pretrained(
        model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=model_opts.lw_neg_ctx,
        lw_neg_q=model_opts.lw_neg_q,
        lw_st_ed=0,
        ranking_loss_type=model_opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=model_opts.hard_pool_size,
        margin=model_opts.margin,
        use_all_neg=model_opts.use_all_neg,
        drop_svmr_prob=model_opts.drop_svmr_prob)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')
    return model, model_opts
Exemple #2
0
def main(opts):
    hvd.init()
    device = torch.device("cuda")  # support single GPU only
    train_opts = Struct(json.load(open(f'{opts.train_dir}/log/hps.json')))

    if 'paired' in train_opts.model:
        EvalDatasetCls = Nlvr2PairedEvalDataset
        eval_collate_fn = nlvr2_paired_eval_collate
        if train_opts.model == 'paired':
            ModelCls = UniterForNlvr2Paired
        elif train_opts.model == 'paired-attn':
            ModelCls = UniterForNlvr2PairedAttn
        else:
            raise ValueError('unrecognized model type')
    elif train_opts.model == 'triplet':
        EvalDatasetCls = Nlvr2TripletEvalDataset
        ModelCls = UniterForNlvr2Triplet
        eval_collate_fn = nlvr2_triplet_eval_collate
    else:
        raise ValueError('unrecognized model type')

    img_db = DetectFeatLmdb(opts.img_db, train_opts.conf_th, train_opts.max_bb,
                            train_opts.min_bb, train_opts.num_bb,
                            opts.compressed_db)
    txt_db = TxtTokLmdb(opts.txt_db, -1)
    dset = EvalDatasetCls(txt_db, img_db, train_opts.use_img_type)
    batch_size = (train_opts.val_batch_size
                  if opts.batch_size is None else opts.batch_size)
    sampler = TokenBucketSampler(dset.lens,
                                 bucket_size=BUCKET_SIZE,
                                 batch_size=batch_size,
                                 droplast=False)
    eval_dataloader = DataLoader(dset,
                                 batch_sampler=sampler,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=eval_collate_fn)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    # Prepare model
    ckpt_file = f'{opts.train_dir}/ckpt/model_step_{opts.ckpt}.pt'
    checkpoint = torch.load(ckpt_file)
    model_config = UniterConfig.from_json_file(
        f'{opts.train_dir}/log/model.json')
    model = ModelCls(model_config, img_dim=IMG_DIM)
    model.init_type_embedding()
    model.load_state_dict(checkpoint, strict=False)
    model.to(device)
    model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    results = evaluate(model, eval_dataloader, device)
    # write results
    if not exists(opts.output_dir):
        os.makedirs(opts.output_dir)
    with open(f'{opts.output_dir}/results.csv', 'w') as f:
        for id_, ans in results:
            f.write(f'{id_},{ans}\n')
    print(f'all results written')
Exemple #3
0
def main(opts):
    hvd.init()
    if hvd.rank() == 0:
        toker = RobertaTokenizer.from_pretrained('roberta-base')
        all_gather_list(None)
    else:
        all_gather_list(None)
        toker = RobertaTokenizer.from_pretrained('roberta-base')

    model_opts = Struct(json.load(open(f"{opts.model_dir}/log/hps.json")))
    model_config = f"{opts.model_dir}/log/model_config.json"

    video_db = load_video_sub_dataset(model_opts.vfeat_db,
                                      model_opts.sub_txt_db,
                                      model_opts.vfeat_interval,
                                      model_opts)
    dset = TvcEvalDataset(video_db, opts.target_clip)
    loader = build_dataloader(dset, opts.batch_size,
                              TvcEvalDataset.collate, False, opts)

    checkpoint = torch.load(f"{opts.model_dir}/ckpt/"
                            f"model_step_{opts.ckpt_step}.pt")

    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    if img_pos_embed_weight_key in checkpoint:
        max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])
    else:
        max_frm_seq_len = MAX_FRM_SEQ_LEN

    model = HeroForTvc.from_pretrained(model_config,
                                       state_dict=checkpoint,
                                       vfeat_dim=VFEAT_DIM,
                                       max_frm_seq_len=max_frm_seq_len,
                                       lsr=model_opts.lsr)
    model.cuda()
    model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    bos = toker.convert_tokens_to_ids(['<s>'])[0]
    eos = toker.convert_tokens_to_ids(['</s>'])[0]
    model.eval()
    generator = TvcGenerator(model, opts.max_gen_step, bos, eos, opts.fp16)
    results = decode(loader, generator, toker)
    save_jsonl(results, opts.output)

    # evaluate score if possible
    if (hvd.rank() == 0
            and 'descs' in json.loads(next(iter(open(opts.target_clip))))):
        evaluator = TVCEval(opts.target_clip)
        score = evaluator(results)
        print(score)
Exemple #4
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    print('fasfafs: ', n_gpu)
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    if opts.train_config is not None:
        train_opts = Struct(json.load(open(opts.train_config)))
        opts.conf_th = train_opts.conf_th
        opts.max_bb = train_opts.max_bb
        opts.min_bb = train_opts.min_bb
        opts.num_bb = train_opts.num_bb

    # load DBs and image dirs
    eval_img_db = DetectFeatLmdb(opts.img_db, opts.conf_th, opts.max_bb,
                                 opts.min_bb, opts.num_bb, opts.compressed_db)
    eval_txt_db = TxtTokLmdb(opts.txt_db, -1)
    eval_dataset = ItmEvalDataset(eval_txt_db, eval_img_db, opts.batch_size)

    # Prepare model
    checkpoint = torch.load(opts.checkpoint)
    model = UniterForImageTextRetrieval.from_pretrained(opts.model_config,
                                                        checkpoint,
                                                        img_dim=IMG_DIM)
    if 'rank_output' not in checkpoint:
        model.init_output()  # zero shot setting

    model.to(device)
    model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=1,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=itm_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    eval_log, results = evaluate(model, eval_dataloader)
Exemple #5
0
def build_dataloader(opts):
    # Load ground truth, query db and video db
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    video_ids = get_video_ids(opts.query_txt_db)
    video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                      model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db, (opts.split, opts.query_txt_db)
    q_txt_db = QueryTokLmdb(opts.query_txt_db, -1)

    eval_dataset = VcmrFullEvalDataset(video_ids,
                                       video_db,
                                       q_txt_db,
                                       distributed=model_opts.distributed_eval)
    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=vcmr_full_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)
    return eval_dataloader
Exemple #6
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    print('fasfafs: ', n_gpu)
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    if opts.train_config is not None:
        train_opts = Struct(json.load(open(opts.train_config)))
        opts.conf_th = train_opts.conf_th
        opts.max_bb = train_opts.max_bb
        opts.min_bb = train_opts.min_bb
        opts.num_bb = train_opts.num_bb

    # Prepare model
    checkpoint = torch.load(opts.checkpoint)
    model = UniterForImageTextRetrieval.from_pretrained(opts.model_config,
                                                        checkpoint,
                                                        img_dim=IMG_DIM)
    if 'rank_output' not in checkpoint:
        model.init_output()  # zero shot setting

    # model.to(device)
    # model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    # load DBs and image dirs
    # npz_path = os.listdir('/root/output_meme_butd')
    # npz_path = ['/root/output_meme_butd/' + i for i in npz_path]
    json_files = open("/root/meme/train.json", "r")
    json_files = json_files.read().split('\n')
    json_files = [json.loads(i) for i in json_files]

    LOGGER.info('load {} file '.format(len(json_files)))

    eval_log, results = evaluate(model, json_files)
Exemple #7
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    if opts.train_config is not None:
        train_opts = Struct(json.load(open(opts.train_config)))
        opts.conf_th = train_opts.conf_th
        opts.max_bb = train_opts.max_bb
        opts.min_bb = train_opts.min_bb
        opts.num_bb = train_opts.num_bb

    # load DBs and image dirs
    eval_img_db = DetectFeatLmdb(opts.img_db, opts.conf_th, opts.max_bb,
                                 opts.min_bb, opts.num_bb, opts.compressed_db)
    eval_txt_db = TxtTokLmdb(opts.txt_db, -1)
    eval_dataset = ItmEvalDataset(eval_txt_db, eval_img_db, opts.batch_size)

    # Prepare model
    checkpoint = torch.load(opts.checkpoint)
    model = UniterForImageTextRetrieval.from_pretrained(opts.model_config,
                                                        checkpoint,
                                                        img_dim=IMG_DIM)
    if 'rank_output' not in checkpoint:
        model.init_output()  # zero shot setting

    model.to(device)
    model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=1,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=itm_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    eval_log, results = evaluate(model, eval_dataloader)
    if hvd.rank() == 0:
        if not exists(opts.output_dir) and rank == 0:
            os.makedirs(opts.output_dir)
        with open(f'{opts.output_dir}/config.json', 'w') as f:
            json.dump(vars(opts), f)
        with open(f'{opts.output_dir}/results.bin', 'wb') as f:
            pickle.dump(results, f)
        with open(f'{opts.output_dir}/scores.json', 'w') as f:
            json.dump(eval_log, f)
        LOGGER.info(f'evaluation finished')
        LOGGER.info(
            f"======================== Results =========================\n"
            f"image retrieval R1: {eval_log['img_r1']*100:.2f},\n"
            f"image retrieval R5: {eval_log['img_r5']*100:.2f},\n"
            f"image retrieval R10: {eval_log['img_r10']*100:.2f}\n"
            f"text retrieval R1: {eval_log['txt_r1']*100:.2f},\n"
            f"text retrieval R5: {eval_log['txt_r5']*100:.2f},\n"
            f"text retrieval R10: {eval_log['txt_r10']*100:.2f}")
        LOGGER.info("========================================================")
Exemple #8
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    hps_file = f"{opts.output_dir}/log/hps.json"
    model_opts = Struct(json.load(open(hps_file)))

    # train_examples = None
    ans2label_file = f"{opts.output_dir}/ckpt/ans2label.json"
    ans2label = json.load(open(ans2label_file))
    label2ans = {label: ans for ans, label in ans2label.items()}

    # load DBs and image dirs
    eval_img_db = DetectFeatLmdb(
        opts.img_db,
        model_opts.conf_th,
        model_opts.max_bb,
        model_opts.min_bb,
        model_opts.num_bb,
        opts.compressed_db,
    )
    eval_txt_db = TxtTokLmdb(opts.txt_db, -1)
    eval_dataset = VqaEvalDataset(len(ans2label), eval_txt_db, eval_img_db)

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f"{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt"
    checkpoint = torch.load(ckpt_file)
    model = UniterForVisualQuestionAnswering.from_pretrained(
        f"{opts.output_dir}/log/model.json",
        checkpoint,
        img_dim=IMG_DIM,
        num_answer=len(ans2label),
    )
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=True, opt_level="O2")

    sampler = TokenBucketSampler(
        eval_dataset.lens,
        bucket_size=BUCKET_SIZE,
        batch_size=opts.batch_size,
        droplast=False,
    )
    eval_dataloader = DataLoader(
        eval_dataset,
        batch_sampler=sampler,
        num_workers=opts.n_workers,
        pin_memory=opts.pin_mem,
        collate_fn=vqa_eval_collate,
    )
    eval_dataloader = PrefetchLoader(eval_dataloader)

    val_log, results, logits = evaluate(model, eval_dataloader, label2ans,
                                        opts.save_logits)
    result_dir = f"{opts.output_dir}/results_test"
    if not exists(result_dir) and rank == 0:
        os.makedirs(result_dir)

    all_results = list(concat(all_gather_list(results)))
    if opts.save_logits:
        all_logits = {}
        for id2logit in all_gather_list(logits):
            all_logits.update(id2logit)
    if hvd.rank() == 0:
        with open(f"{result_dir}/"
                  f"results_{opts.checkpoint}_all.json", "w") as f:
            json.dump(all_results, f)
        if opts.save_logits:
            np.savez(f"{result_dir}/logits_{opts.checkpoint}_all.npz",
                     **all_logits)
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(json.load(open(hps_file)))
    model_config = f'{opts.output_dir}/log/model_config.json'

    # load DBs and image dirs
    video_ids = get_video_ids(opts.query_txt_db)
    video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                      model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db
    q_txt_db = QaQueryTokLmdb(opts.query_txt_db, -1)
    eval_dataset = ViolinEvalDataset(video_ids,
                                     video_db,
                                     q_txt_db,
                                     sampled_by_q=model_opts.sampled_by_q)
    collate_fn = violin_eval_collate

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])

    model = HeroForViolin.from_pretrained(model_config,
                                          state_dict=checkpoint,
                                          vfeat_dim=VFEAT_DIM,
                                          max_frm_seq_len=max_frm_seq_len)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=collate_fn)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results, logits = validate_violin(model, eval_dataloader, opts.split,
                                         opts.save_logits)
    result_dir = f'{opts.output_dir}/results_{opts.split}'
    if opts.save_logits:
        result_dir += '_w_logit'
    if not exists(result_dir) and hvd.rank() == 0:
        os.makedirs(result_dir)

    all_results = {}
    for id2res in all_gather_list(results):
        all_results.update(id2res)
    if opts.save_logits:
        all_logits = {}
        for id2logit in all_gather_list(logits):
            all_logits.update(id2logit)
    if hvd.rank() == 0:
        save_json(all_results,
                  f'{result_dir}/results_{opts.checkpoint}_all.json')
        LOGGER.info('All results written......')
        if opts.save_logits:
            save_pickle(all_logits,
                        f'{result_dir}/logits_{opts.checkpoint}_all.pkl')
            LOGGER.info('All logits written......')
Exemple #10
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(
                    device, n_gpu, hvd.rank(), opts.fp16))

    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = json.load(open(hps_file))
    if 'mlp' not in model_opts:
        model_opts['mlp'] = 1
    model_opts = Struct(model_opts)
    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_epoch_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    model = UniterForReferringExpressionComprehension.from_pretrained(
        f'{opts.output_dir}/log/model.json', checkpoint,
        img_dim=IMG_DIM, mlp=model_opts.mlp)
    model.to(device)
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    if opts.fp16:
        model = amp.initialize(model, enabled=True, opt_level='O2')

    # load DBs and image dirs
    img_db_type = "gt" if "coco_gt" in opts.img_db else "det"
    conf_th = -1 if img_db_type == "gt" else model_opts.conf_th
    num_bb = 100 if img_db_type == "gt" else model_opts.num_bb
    eval_img_db = DetectFeatLmdb(opts.img_db,
                                 conf_th, model_opts.max_bb,
                                 model_opts.min_bb, num_bb,
                                 opts.compressed_db)

    # Prepro txt_dbs
    txt_dbs = opts.txt_db.split(':')
    for txt_db in txt_dbs:
        print(f'Evaluating {txt_db}')
        eval_txt_db = ReTxtTokLmdb(txt_db, -1)
        eval_dataset = ReEvalDataset(
            eval_txt_db, eval_img_db, use_gt_feat=img_db_type == "gt")

        sampler = DistributedSampler(eval_dataset, num_replicas=n_gpu,
                                     rank=rank, shuffle=False)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=sampler,
                                     batch_size=opts.batch_size,
                                     num_workers=opts.n_workers,
                                     pin_memory=opts.pin_mem,
                                     collate_fn=re_eval_collate)
        eval_dataloader = PrefetchLoader(eval_dataloader)

        # evaluate
        val_log, results = evaluate(model, eval_dataloader)

        result_dir = f'{opts.output_dir}/results_test'
        if not exists(result_dir) and rank == 0:
            os.makedirs(result_dir)
        write_to_tmp(
            f"{txt_db.split('_')[1].split('.')[0]}-acc({img_db_type}): {results['acc']*100:.2f}% ",
            args.tmp_file)

        all_results = list(concat(all_gather_list(results)))

        if hvd.rank() == 0:
            db_split = txt_db.split('/')[-1].split('.')[0]  # refcoco+_val
            img_dir = opts.img_db.split('/')[-1]  # re_coco_gt
            with open(f'{result_dir}/'
                    f'results_{opts.checkpoint}_{db_split}_on_{img_dir}_all.json', 'w') as f:
                json.dump(all_results, f)
        # print
        print(f'{opts.output_dir}/results_test')

    write_to_tmp(f'\n', args.tmp_file)
Exemple #11
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(
                    device, n_gpu, hvd.rank(), opts.fp16))
    if rank != 0:
        LOGGER.disabled = True

    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(json.load(open(hps_file)))

    assert opts.split in opts.img_db and opts.split in opts.txt_db
    # load DBs and image dirs
    eval_img_db, eval_img_db_gt = load_img_feat(opts.img_db, model_opts)
    eval_txt_db = VcrTxtTokLmdb(opts.txt_db, -1)
    eval_dataset = VcrEvalDataset(
        "test", eval_txt_db, img_db=eval_img_db,
        img_db_gt=eval_img_db_gt)

    # Prepare model
    model = UniterForVisualCommonsenseReasoning.from_pretrained(
        f'{opts.output_dir}/log/model.json', state_dict={},
        img_dim=IMG_DIM)
    model.init_type_embedding()
    model.init_word_embedding(NUM_SPECIAL_TOKENS)
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    state_dict = checkpoint.get('model_state', checkpoint)
    matched_state_dict = {}
    unexpected_keys = set()
    missing_keys = set()
    for name, param in model.named_parameters():
        missing_keys.add(name)
    for key, data in state_dict.items():
        if key in missing_keys:
            matched_state_dict[key] = data
            missing_keys.remove(key)
        else:
            unexpected_keys.add(key)
    LOGGER.info(f"Unexpected_keys: {list(unexpected_keys)}")
    LOGGER.info(f"Missing_keys: {list(missing_keys)}")
    model.load_state_dict(matched_state_dict, strict=False)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=True, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 shuffle=False,
                                 collate_fn=vcr_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results = evaluate(model, eval_dataloader)
    result_dir = f'{opts.output_dir}/results_{opts.split}'
    if not exists(result_dir) and rank == 0:
        os.makedirs(result_dir)

    all_results = {}
    for id2res in all_gather_list(results):
        all_results.update(id2res)
    if hvd.rank() == 0:
        with open(f'{result_dir}/'
                  f'results_{opts.checkpoint}_all.json', 'w') as f:
            json.dump(all_results, f)
        probs_df = save_for_submission(
            f'{result_dir}/results_{opts.checkpoint}_all.json')
        probs_df.to_csv(f'{result_dir}/results_{opts.checkpoint}_all.csv')
Exemple #12
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, 16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    model_config = f'{opts.output_dir}/log/model_config.json'

    # load DBs and image dirs
    video_ids = get_video_ids(opts.query_txt_db)
    if opts.task != "didemo_video_only":
        video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts)
    else:
        txt_meta = load_json(os.path.join(opts.query_txt_db, "meta.json"))
        video_db = load_video_only_dataset(opts.vfeat_db, txt_meta, model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db
    q_txt_db = QueryTokLmdb(opts.query_txt_db, -1)
    if opts.task != "didemo_video_only":
        inf_dataset = VcmrFullEvalDataset
    else:
        inf_dataset = VcmrVideoOnlyFullEvalDataset

    eval_dataset = inf_dataset(video_ids, video_db, q_txt_db, distributed=model_opts.distributed_eval)

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = ("v_encoder.f_encoder.img_embeddings.position_embeddings.weight")
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])

    model = HeroForVcmr.from_pretrained(
        model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=model_opts.lw_neg_ctx,
        lw_neg_q=model_opts.lw_neg_q, lw_st_ed=0,
        ranking_loss_type=model_opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=model_opts.hard_pool_size,
        margin=model_opts.margin,
        use_all_neg=model_opts.use_all_neg,
        drop_svmr_prob=model_opts.drop_svmr_prob)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=vcmr_full_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results = validate_full_vcmr(model, eval_dataloader, opts.split, opts, model_opts)
    result_dir = f'{opts.output_dir}/results_{opts.split}'

    if not exists(result_dir) and rank == 0:
        os.makedirs(result_dir)

    all_results_list = all_gather_list(results)

    if hvd.rank() == 0:  # save for only one time
        all_results = {"video2idx": all_results_list[0]["video2idx"]}
        for rank_id in range(hvd.size()):
            for key, val in all_results_list[rank_id].items():
                if key == "video2idx":
                    continue
                if key not in all_results:
                    all_results[key] = []
                all_results[key].extend(all_results_list[rank_id][key])
        LOGGER.info('All results joined......')

        # save_vr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vr.json')
        # save_vcmr_base_on_vr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vcmr_base_on_vr.json')
        save_vcmr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vcmr.json')
Exemple #13
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))
    if rank != 0:
        LOGGER.disabled = True

    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(json.load(open(hps_file)))

    assert opts.split in opts.img_db and opts.split in opts.txt_db
    # load DBs and image dirs
    eval_img_db, eval_img_db_gt = load_img_feat(opts.img_db, model_opts)
    eval_txt_db = VcrTxtTokLmdb(opts.txt_db, -1)
    eval_dataset = VcrEvalDataset("val",
                                  eval_txt_db,
                                  img_db=eval_img_db,
                                  img_db_gt=eval_img_db_gt)

    # Prepare model
    model = UniterForVisualCommonsenseReasoning.from_pretrained(
        f'{opts.output_dir}/log/model.json', state_dict={}, img_dim=IMG_DIM)
    model.init_type_embedding()
    model.init_type_embedding_know()
    model.init_word_embedding(NUM_SPECIAL_TOKENS)
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    state_dict = checkpoint.get('model_state', checkpoint)
    matched_state_dict = {}
    unexpected_keys = set()
    missing_keys = set()
    for name, param in model.named_parameters():
        missing_keys.add(name)
    for key, data in state_dict.items():
        if key in missing_keys:
            matched_state_dict[key] = data
            missing_keys.remove(key)
        else:
            unexpected_keys.add(key)
    LOGGER.info(f"Unexpected_keys: {list(unexpected_keys)}")
    LOGGER.info(f"Missing_keys: {list(missing_keys)}")
    model.load_state_dict(matched_state_dict, strict=False)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=True, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 shuffle=False,
                                 collate_fn=vcr_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    results = evaluate(model, eval_dataloader)

    output = '/src/vlkaf.json'
    before_json = ""
    for i, item in enumerate(results):
        jstring = json.dumps(item)
        before_json += jstring + '\n'

    f = open(output, "w")
    f.write(before_json)
    f.close()
    '''