Exemple #1
0
def main_mix_results(pred_path, tef_pred_path, save_path, max_after_nms=100):
    """
    Args:
        pred_path: contains top-200 VCMR predictions
        tef_pred_path: contains top-1000 VCMR predictions
        save_path:
        max_after_nms: int,
    Returns:
        save
    """
    vcmr_res, video2idx = load_saved_res(pred_path)
    tef_vcmr_res, video2idx = load_saved_res(tef_pred_path)

    reranked_vcmr_res = {}
    num_valid = []
    for desc_id, preds in tqdm(vcmr_res.items(),
                               desc="Loop over the predictions"):
        tef_preds = tef_vcmr_res[desc_id]["predictions"]
        pred_moments = set([tuple(e[:3]) for e in preds["predictions"]])
        reranked_moments = [
            e for e in tef_preds if tuple(e[:3]) in pred_moments
        ][:max_after_nms]
        num_valid += [len(reranked_moments)]
        if len(reranked_moments) != 100:
            reranked_moments += reranked_moments[:100 - len(reranked_moments)]
        reranked_vcmr_res[desc_id] = dict(predictions=reranked_moments,
                                          desc_id=desc_id,
                                          desc=preds["desc"])

    print("There are {} moments founded on average".format(np.mean(num_valid)))
    reranked_predictions = dict(VCMR=list(reranked_vcmr_res.values()),
                                video2idx=video2idx)

    save_json(reranked_predictions, save_path)
Exemple #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_path", type=str, required=True)
    parser.add_argument("--dset_name",
                        type=str,
                        default="tvc",
                        choices=["tvc"])
    parser.add_argument("--cache", type=str, default="./cache")
    parser.add_argument("--min_word_count", type=int, default=5)
    parser.add_argument("--raw_glove_path",
                        type=str,
                        help="downloaded glove vectors path")

    opt = parser.parse_args()
    if not os.path.exists(opt.cache):
        os.makedirs(opt.cache)

    # load, merge, clean, split data
    train_datalist = load_jsonl(opt.train_path)
    all_sentences = flat_list_of_lists(
        [[sub_e["desc"] for sub_e in e["descs"]] for e in train_datalist])
    all_sentences = [
        nltk.tokenize.word_tokenize(sen.lower()) for sen in all_sentences
    ]
    word2idx = build_vocab_idx(all_sentences, opt.min_word_count)
    print("[Info] Dumping the processed data to json file", opt.cache)
    save_json(
        word2idx,
        os.path.join(opt.cache, "{}_word2idx.json".format(opt.dset_name)))
    print("[Info] Finish.")

    if opt.raw_glove_path:
        vocab_glove_path = os.path.join(
            opt.cache, "{}_vocab_glove.pt".format(opt.dset_name))
        extract_glove(word2idx, opt.raw_glove_path, vocab_glove_path)
Exemple #3
0
def save_training_meta(args):
    # Comment out, since rank is not saved to args. Safeguard save_training_meta already in training scripts.
    # if args.rank > 0:
    #    return

    # args is an EasyDict object, treat it the same as a normal dict
    os.makedirs(join(args.output_dir, 'log'), exist_ok=True)
    os.makedirs(join(args.output_dir, 'ckpt'), exist_ok=True)

    # training args
    save_args_path = join(args.output_dir, 'log', 'hps.json')
    save_json(vars(args), save_args_path, save_pretty=True)

    # model args
    model_config = load_json(args.model_config)
    save_model_config_path = join(args.output_dir, 'log', 'model_config.json')
    save_json(model_config, save_model_config_path, save_pretty=True)
    # git info
    try:
        LOGGER.info("Waiting on git info....")
        c = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"],
                           timeout=10,
                           stdout=subprocess.PIPE)
        git_branch_name = c.stdout.decode().strip()
        LOGGER.info("Git branch: %s", git_branch_name)
        c = subprocess.run(["git", "rev-parse", "HEAD"],
                           timeout=10,
                           stdout=subprocess.PIPE)
        git_sha = c.stdout.decode().strip()
        LOGGER.info("Git SHA: %s", git_sha)
        git_dir = abspath(dirname(__file__))
        git_status = subprocess.check_output(['git', 'status', '--short'],
                                             cwd=git_dir,
                                             universal_newlines=True).strip()
        with open(join(args.output_dir, 'log', 'git_info.json'),
                  'w') as writer:
            json.dump(
                {
                    'branch': git_branch_name,
                    'is_dirty': bool(git_status),
                    'status': git_status,
                    'sha': git_sha
                },
                writer,
                indent=4)
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError) as e:
        LOGGER.exception(e)
        LOGGER.warn("Git info not found. Saving code into zip instead...")
        # save a copy of the codebase.
        # !!!Do not store heavy file in your codebase when using it.
        code_dir = dirname(dirname(realpath(__file__)))
        code_zip_filename = os.path.join(args.output_dir, "code.zip")
        LOGGER.info(f"Saving code from {code_dir} to {code_zip_filename}...")
        make_zipfile(code_dir,
                     code_zip_filename,
                     enclosing_dir="code",
                     exclude_dirs_substring="results",
                     exclude_dirs=["results", "debug_results", "__pycache__"],
                     exclude_extensions=[".pyc", ".ipynb", ".swap"])
        LOGGER.info("Saving code done.")
Exemple #4
0
 def save_args(self, opt):
     args = vars(opt)
     # Save settings
     if not isinstance(self, TestOptions):
         option_file_path = os.path.join(
             opt.results_dir,
             self.saved_option_filename)  # not yaml file indeed
         save_json(args, option_file_path, save_pretty=True)
Exemple #5
0
    def display_save(self, opt):
        args = vars(opt)
        # Display settings
        print("------------ Options -------------\n{}\n-------------------"
              .format({str(k): str(v) for k, v in sorted(args.items())}))

        # Save settings
        if not isinstance(self, TestOptions):
            option_file_path = os.path.join(opt.results_dir, self.saved_option_filename)  # not yaml file indeed
            save_json(args, option_file_path, save_pretty=True)
def combine(video_name_split_path, video_duration_path, save_path):
    video_name_split = load_json(video_name_split_path)
    video_duration_dict = load_json(video_duration_path)

    combined_dict = {}
    for split_name, split_video_names in video_name_split.items():
        combined_dict[split_name] = {
            vid_name: video_duration_dict[vid_name]
            for vid_name in split_video_names
        }
    save_json(combined_dict, save_path)
def main_compute_upper_bound():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("-dset_name", type=str, choices=["tvr"])
    parser.add_argument(
        "-eval_file_path",
        type=str,
        help="path to the file containing data to be evaluated")
    parser.add_argument("-save_path",
                        type=str,
                        help="path to save the results")
    parser.add_argument("-verbose", action="store_true")
    args = parser.parse_args()

    eval_datalist = load_jsonl(args.eval_file_path)
    video_proposals_list = get_proposals_for_videos(eval_datalist,
                                                    args.dset_name)
    recall_metrics = compute_proposal_recall_upper_bound(video_proposals_list,
                                                         iou_thds=(0.5, 0.7))

    video_proposals_list_by_video = {}
    for p in video_proposals_list:
        if p["vid_name"] in video_proposals_list_by_video:
            continue
        else:
            video_proposals_list_by_video[p["vid_name"]] = p
    video_proposals_list_by_video = list(
        video_proposals_list_by_video.values())
    total_n_clips_in_proposals = \
        np.sum([np.sum(e["proposals"][:, 1] - e["proposals"][:, 0]) for e in video_proposals_list_by_video])

    results = dict(avg_num_proposals=float(
        np.mean([len(e["proposals"]) for e in video_proposals_list_by_video])),
                   total_num_proposals=int(
                       np.sum([
                           len(e["proposals"])
                           for e in video_proposals_list_by_video
                       ])),
                   recall_metrics=recall_metrics,
                   dset_name=args.dset_name,
                   filename=args.eval_file_path,
                   proposal_config=ProposalConfigs[args.dset_name])
    results["avg_clip_per_proposal"] = total_n_clips_in_proposals / results[
        "total_num_proposals"]
    save_json(results, args.save_path, save_pretty=True)
    if args.verbose:
        pprint.pprint(results)
Exemple #8
0
def validate(model, val_dataloaders, split, opts, global_step=0):
    model.eval()
    task = opts.task
    loader = val_dataloaders[task]
    LOGGER.info(f"validate on {task} task")
    val_log, results, _ = validate_violin(model,
                                          loader,
                                          split=split,
                                          save_logits=False)
    save_json(
        results, f'{opts.output_dir}/results/'
        f'val_results_{global_step}'
        f'_rank{hvd.rank()}_final.json')
    val_log = {f'{task}_{k}': v for k, v in val_log.items()}
    TB_LOGGER.log_scaler_dict(
        {f'valid_{task}/{k}': v
         for k, v in val_log.items()})
    model.train()
Exemple #9
0
def save_vcmr(results, target):  # add by zhixin
    def __format_vcmr_prediction(pred, top_k):
        _v_idx, _st, _ed, _score = zip(*pred)
        # map video index to video id
        _v_id = [vidx2vid[_idx] for _idx in _v_idx]
        # precess score
        _score = torch.tensor(_score).softmax(-1).tolist()
        pred = list(map(list, zip(_v_id, _st, _ed, _score)))[:top_k]  # list of list
        return pred

    k = 200
    vidx2vid = {results["video2idx"][vid]: vid for vid in results["video2idx"]}
    vcmr_result = results["VCMR"]
    vcmr_pred = {}
    for i, item in enumerate(vcmr_result):
        vcmr_pred[item["desc_id"]] = __format_vcmr_prediction(item["predictions"], k)  # [[vid, st, ed, score], ...]

    save_json(vcmr_pred, target)
    LOGGER.info('VCMR results written......')
Exemple #10
0
def eval_epoch(model, eval_dataset, opt, save_submission_filename,
               tasks=("SVMR",), max_after_nms=100):
    """max_after_nms: always set to 100, since the eval script only evaluate top-100"""
    model.eval()
    logger.info("Computing scores")
    # logger.info("Start timing")
    # times = []
    # for _ in range(3):
    #     st_time = time.time()
    #     eval_submission_raw = get_eval_res(model, eval_dataset, opt, tasks, max_after_nms=max_after_nms)
    #     times += [time.time() - st_time]
    # times = torch.FloatTensor(times)

    eval_submission_raw = get_eval_res(model, eval_dataset, opt, tasks, max_after_nms=max_after_nms)

    IOU_THDS = (0.5, 0.7)
    logger.info("Saving/Evaluating before nms results")
    submission_path = os.path.join(opt.results_dir, save_submission_filename)
    eval_submission = get_submission_top_n(eval_submission_raw, top_n=max_after_nms)
    save_json(eval_submission, submission_path)

    metrics = eval_retrieval(eval_submission, eval_dataset.query_data,
                             iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug,
                             use_desc_type=opt.dset_name == "tvr")
    # metrics["time_avg"] = float(times.mean())
    # metrics["time_std"] = float(times.std())
    save_metrics_path = submission_path.replace(".json", "_metrics.json")
    save_json(metrics, save_metrics_path, save_pretty=True, sort_keys=False)
    latest_file_paths = [submission_path, save_metrics_path]

    if opt.nms_thd != -1:
        logger.info("Performing nms with nms_thd {}".format(opt.nms_thd))
        eval_submission_after_nms = dict(video2idx=eval_submission_raw["video2idx"])
        for k, nms_func in POST_PROCESSING_MMS_FUNC.items():
            if k in eval_submission_raw:
                eval_submission_after_nms[k] = nms_func(eval_submission_raw[k],
                                                        nms_thd=opt.nms_thd,
                                                        max_before_nms=opt.max_before_nms,
                                                        max_after_nms=max_after_nms)

        logger.info("Saving/Evaluating nms results")
        submission_nms_path = submission_path.replace(".json", "_nms_thd_{}.json".format(opt.nms_thd))
        save_json(eval_submission_after_nms, submission_nms_path)
        metrics_nms = eval_retrieval(eval_submission_after_nms, eval_dataset.query_data,
                                     iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug)
        save_metrics_nms_path = submission_nms_path.replace(".json", "_metrics.json")
        save_json(metrics_nms, save_metrics_nms_path, save_pretty=True, sort_keys=False)
        latest_file_paths += [submission_nms_path, save_metrics_nms_path]
    else:
        metrics_nms = None
    return metrics, metrics_nms, latest_file_paths
def main_convert():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--src_h5_file",
                        type=str,
                        help="subtitle words level feature .h5 file")
    parser.add_argument("--vid_clip_h5_file",
                        type=str,
                        help="video clip level feature .h5 file")
    parser.add_argument("--sub_meta_path",
                        type=str,
                        help="processed subtitle .jsonl path")
    parser.add_argument("--tgt_h5_file",
                        type=str,
                        help=".h5 path to stores the converted data")
    parser.add_argument("--pool_type",
                        type=str,
                        default="max",
                        choices=["max", "avg"],
                        help="how to aggreate frame features")
    parser.add_argument("--clip_length", type=float, default=1.5)
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()

    sub_info_cache_path = args.tgt_h5_file.replace(".h5", "_sub_info.json")
    if not os.path.exists(sub_info_cache_path):
        video2sub_info = load_process_sub_meta(args.sub_meta_path,
                                               clip_length=args.clip_length)
        save_json(video2sub_info, sub_info_cache_path)
    else:
        video2sub_info = load_json(sub_info_cache_path)
    with h5py.File(args.src_h5_file, "r") as src_h5:
        with h5py.File(args.vid_clip_h5_file, "r") as vid_clip_h5:
            with h5py.File(args.tgt_h5_file, "w") as tgt_h5:
                convert_h5(src_h5,
                           vid_clip_h5,
                           tgt_h5,
                           video2sub_info,
                           pool_type=args.pool_type,
                           debug=args.debug)
Exemple #12
0
def eval_epoch(model,
               eval_dataset,
               opt,
               save_submission_filename,
               tasks=("SVMR", ),
               max_before_nms=1000,
               max_after_nms=100):
    model.eval()
    logger.info("Computing scores")
    logger.info("Start timing")
    # times = []
    # for _ in range(3):
    #     st_time = time.time()
    eval_res = compute_query2ctx_scores(model, eval_dataset, opt)
    logger.info("Generating predictions from scores")
    eval_submission_raw = dict(video2idx=eval_res["video2idx"])
    eval_submission_raw["VR"] = generate_vr_predictions_from_res(eval_res)
    # times += [time.time() - st_time]
    # times = torch.FloatTensor(times)
    IOU_THDS = (0.5, 0.7)

    logger.info("Saving/Evaluating before nms results")
    submission_path = os.path.join(opt.results_dir, save_submission_filename)
    eval_submission = get_submission_top_n(eval_submission_raw, top_n=100)
    save_json(eval_submission, submission_path)

    metrics = eval_retrieval(eval_submission,
                             eval_dataset.query_data,
                             iou_thds=IOU_THDS,
                             match_number=not opt.debug,
                             verbose=opt.debug)
    # metrics["time_avg"] = float(times.mean())
    # metrics["time_std"] = float(times.std())
    save_metrics_path = submission_path.replace(".json", "_metrics.json")
    save_json(metrics, save_metrics_path, save_pretty=True, sort_keys=False)
    latest_file_paths = [submission_path, save_metrics_path]

    metrics_nms = None
    return metrics, metrics_nms, latest_file_paths
Exemple #13
0
def save_vcmr_base_on_vr(results, target):  # add by zhixin
    k = 4
    vidx2vid = {results["video2idx"][vid]: vid for vid in results["video2idx"]}
    vr_result = {item["desc_id"]: [vidx2vid[s[0]] for s in item["predictions"][:k]] for item in results["VR"]}
    vcmr_result = results["VCMR"]
    vcmr_submission = {}
    found = False
    for i, item in enumerate(vcmr_result):
        desc_id = item["desc_id"]
        for rank, vcmr_proposal in enumerate(item["predictions"]):
            vidx, st, ed, s = vcmr_proposal
            vid = vidx2vid[vidx]
            if vid in vr_result[desc_id]:
                rank_in_vr = vr_result[desc_id].index(vid)
                vcmr_submission[desc_id] = (rank, rank_in_vr, vid, st, ed)
                found = True
                break

    if not found:
        assert False

    save_json(vcmr_submission, target)
    LOGGER.info('VCMR (based on VR) results written......')
def main(opts):
    if not exists(opts.output):
        os.makedirs(opts.output)
    else:
        raise ValueError('Found existing DB. Please explicitly remove '
                         'for re-processing')
    meta = vars(opts)
    meta['tokenizer'] = opts.toker
    toker = RobertaTokenizer.from_pretrained(opts.toker)
    tokenizer = roberta_tokenize(toker)
    meta['BOS'] = toker.convert_tokens_to_ids(['<s>'])[0]
    meta['EOS'] = toker.convert_tokens_to_ids(['</s>'])[0]
    meta['SEP'] = toker.convert_tokens_to_ids(['</s>'])[0]
    meta['CLS'] = toker.convert_tokens_to_ids(['<s>'])[0]
    meta['PAD'] = toker.convert_tokens_to_ids(['<pad>'])[0]
    meta['MASK'] = toker.convert_tokens_to_ids(['<mask>'])[0]
    meta['UNK'] = toker.convert_tokens_to_ids(['<unk>'])[0]
    meta['v_range'] = (toker.convert_tokens_to_ids(['.'])[0],
                       toker.convert_tokens_to_ids(['<|endoftext|>'])[0] + 1)
    save_json(vars(opts), f'{opts.output}/meta.json', save_pretty=True)

    open_db = curry(open_lmdb, opts.output, readonly=False)
    with open_db() as db:
        sub_info_cache_path = f'{opts.output}/sub_info.json'
        try:
            vid2nframe = load_json(opts.vid2nframe)
        except Exception:
            vid2nframe = None
        if not os.path.exists(sub_info_cache_path):
            video2sub_info = load_process_sub_meta(
                opts.annotation, vid2nframe, frame_length=args.frame_length)
            save_json(video2sub_info, sub_info_cache_path)
        else:
            video2sub_info = load_json(sub_info_cache_path)
        with open(opts.annotation) as ann:
            vid2len, vid2max_frame_sub_len = process_tv_subtitles(
                ann, video2sub_info, db, tokenizer, meta['SEP'])

        save_json(vid2len, f'{opts.output}/vid2len.json')
        save_json(vid2max_frame_sub_len,
                  f'{opts.output}/vid2max_frame_sub_len.json')
Exemple #15
0
def main(opts):
    if not exists(opts.output):
        os.makedirs(opts.output)
    else:
        raise ValueError('Found existing DB. Please explicitly remove '
                         'for re-processing')
    meta = vars(opts)
    meta['tokenizer'] = opts.toker
    toker = RobertaTokenizer.from_pretrained(
        opts.toker)
    tokenizer = roberta_tokenize(toker)
    meta['BOS'] = toker.convert_tokens_to_ids(['<s>'])[0]
    meta['EOS'] = toker.convert_tokens_to_ids(['</s>'])[0]
    meta['SEP'] = toker.convert_tokens_to_ids(['</s>'])[0]
    meta['CLS'] = toker.convert_tokens_to_ids(['<s>'])[0]
    meta['PAD'] = toker.convert_tokens_to_ids(['<pad>'])[0]
    meta['MASK'] = toker.convert_tokens_to_ids(['<mask>'])[0]
    meta['UNK'] = toker.convert_tokens_to_ids(['<unk>'])[0]
    meta['v_range'] = (toker.convert_tokens_to_ids(['.'])[0],
                       toker.convert_tokens_to_ids(['<|endoftext|>'])[0]+1)
    save_json(vars(opts), f'{opts.output}/meta.json', save_pretty=True)

    open_db = curry(open_lmdb, opts.output, readonly=False)
    with open_db() as db:
        with open(opts.annotation, "r") as ann:
            if opts.task == "tvr":
                id2lens, query2video, query_data = process_tvr(
                    ann, db, tokenizer)
            elif opts.task == "tvqa":
                id2lens, query2video, query_data = process_tvqa(
                    ann, db, tokenizer)
            elif opts.task == "violin":
                id2lens, query2video, query_data = process_violin(
                    ann, db, tokenizer)
            else:
                raise NotImplementedError(
                    f"prepro for {opts.task} not implemented")

    save_json(id2lens, f'{opts.output}/id2len.json')
    save_json(query2video, f'{opts.output}/query2video.json')
    save_jsonl(query_data, f'{opts.output}/query_data.jsonl')
Exemple #16
0
def save_vr(results, target):  # add by zhixin
    k = 4
    vidx2vid = {results["video2idx"][vid]: vid for vid in results["video2idx"]}
    vr_submission = {item["desc_id"]: [vidx2vid[s[0]] for s in item["predictions"][:k]] for item in results["VR"]}
    save_json(vr_submission, target)
    LOGGER.info('VR results written......')
def main_run():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", type=str, default="mee", help="which models to simulate")
    parser.add_argument("--cache_dir", type=str, default="baselines/profiling/cache", help="save index/results path")
    parser.add_argument("--n_runs", type=int, default=100, help="number of runs to calc average")
    parser.add_argument("--n_warmup_runs", type=int, default=10, help="number of warmup runs, to init cuda, etc.")
    args = parser.parse_args()

    """
    The numbers are get from the first author of 
    `Temporal Localization of Moments in Video Collections with Natural Language`
    """
    k = 100
    n_query = 100
    n_videos = 1000000
    n_moments_per_video = 170
    hsz = 256
    n_clips_per_video = 20
    n_total_clips_in_moments = 1170946944
    n_moments = 170000000
    max_clips_per_proposal = 14  # assume padding to this number
    avg_clips_per_proposal = 7  # 6.88

    mode = args.mode
    cfg_path = os.path.join(args.cache_dir, "{}_args.json".format(mode))

    n_runs = args.n_runs
    n_warmup_runs = args.n_warmup_runs
    torch.set_grad_enabled(False)
    if mode in ["mee", "mee_torch"]:
        func_args = dict(n_videos=n_videos, d=hsz, n_query=n_query, max_neighbors=k,
                         n_runs=n_runs, n_warmup_runs=n_warmup_runs)
        avg_time = simulate_mee_runtime(**func_args)
    elif mode == "xml_vr":
        func_args = dict(n_videos=n_videos*n_clips_per_video, d=hsz, n_query=n_query,
                         max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs)
        avg_time = simulate_mee_runtime(**func_args)
    elif mode == "cal":
        # can only use n_query <= 4000, so use 4000. To get 20000, simply x5 the final time.
        n_cal_rerank_videos = 100
        func_args = dict(n_moments=n_cal_rerank_videos*n_moments_per_video,
                         avg_n_clips_per_moment=avg_clips_per_proposal,
                         d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs)
        avg_time = simulate_cal_rerank_time(**func_args)
    elif mode == "mcn":
        n_cal_rerank_videos = 100
        func_args = dict(n_moments=n_cal_rerank_videos*n_moments_per_video, d=hsz, n_query=n_query,
                         max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs)
        avg_time = simulate_mcn_rerank_time(**func_args)
    elif mode == "xml":
        n_xml_videos = 100
        func_args = dict(n_videos=n_xml_videos, avg_n_clips_per_video=n_clips_per_video,
                         d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs)
        avg_time = simulate_xml_rerank_time(**func_args)
    elif mode == "storage":
        func_args = dict(hsz=hsz, n_videos=n_videos, n_clips_per_video=n_clips_per_video,
                         n_moments=n_moments, n_total_clips_in_moments=n_total_clips_in_moments, dtype_size=4)
        storage = get_storage_size(**func_args)
    else:
        raise NotImplementedError

    if mode == "storage":
        func_args["storage"] = storage
    else:
        func_args["n_runs"] = args.n_runs
        func_args["avg_time"] = avg_time
    func_args["mode"] = mode
    print(func_args)
    save_json(func_args, cfg_path, save_pretty=True)
Exemple #18
0
def get_args():
    """parse and preprocess cmd line args"""
    parser = argparse.ArgumentParser()
    parser.add_argument("-ctx_mode",
                        type=str,
                        default="video_sub",
                        choices=["video", "sub", "video_sub"])

    # model config
    parser.add_argument("-hidden_size", type=int, default=768)
    parser.add_argument("-intermediate_size", type=int, default=768)
    parser.add_argument("-word_vec_size", type=int, default=300)
    parser.add_argument("-vid_feat_size",
                        type=int,
                        default=3072,
                        help="2048 appearance + 1024 flow")
    parser.add_argument("-max_v_len",
                        type=int,
                        default=20,
                        help="max length of video feature")
    parser.add_argument("-max_sub_len",
                        type=int,
                        default=50,
                        help="max number of words in subtitle")
    parser.add_argument("-max_cap_len",
                        type=int,
                        default=20,
                        help="max length of caption")
    parser.add_argument("-type_vocab_size",
                        type=int,
                        default=2,
                        help="video as 0, text as 1")
    parser.add_argument("-layer_norm_eps", type=float, default=1e-12)
    parser.add_argument("-hidden_dropout_prob", type=float, default=0.1)
    parser.add_argument("-num_hidden_layers",
                        type=int,
                        default=2,
                        help="number of transformer layers")
    parser.add_argument("-attention_probs_dropout_prob",
                        type=float,
                        default=0.1)
    parser.add_argument("-num_attention_heads", type=int, default=12)
    parser.add_argument("-initializer_range", type=float, default=0.02)
    parser.add_argument("-glove_path",
                        type=str,
                        default=None,
                        help="extracted GloVe vectors")
    parser.add_argument("-freeze_glove",
                        action="store_true",
                        help="do not train GloVe vectors")
    parser.add_argument(
        "-share_wd_cls_weight",
        action="store_true",
        help=
        "share weight matrix of the word embedding with the final classifier, "
    )

    # training config -- learning rate
    parser.add_argument("-lr", type=float, default=1e-4)
    parser.add_argument(
        "-lr_warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10% of training.")
    parser.add_argument("-grad_clip",
                        type=float,
                        default=1,
                        help="clip gradient, -1 == disable")

    parser.add_argument("-train_path",
                        type=str,
                        default=None,
                        help="path to the training data")
    parser.add_argument("-eval_path",
                        type=str,
                        default=None,
                        help="path to the eval data")
    parser.add_argument("-data_ratio",
                        type=float,
                        default=1,
                        help="how many train/eval data to use")
    parser.add_argument(
        "-reference_path",
        type=str,
    )
    parser.add_argument("-sub_meta_path",
                        type=str,
                        default=None,
                        help="path to")
    parser.add_argument("-vid_feat_path",
                        type=str,
                        default=None,
                        help="path to video features")
    parser.add_argument(
        "-no_norm_vfeat",
        action="store_true",
        help=
        "Do not do normalization on video feat, use it when using i3d_resnet concat feat"
    )
    parser.add_argument("-word2idx_path",
                        type=str,
                        default="./cache/word2idx.json")
    parser.add_argument("-label_smoothing",
                        type=float,
                        default=0.1,
                        help="Use soft target instead of one-hot hard target")
    parser.add_argument("-n_epoch",
                        type=int,
                        default=50,
                        help="Number of training epochs")
    parser.add_argument(
        "-max_es_cnt",
        type=int,
        default=10,
        help="stop if the model is not improving for max_es_cnt max_es_cnt")
    parser.add_argument("-batch_size",
                        type=int,
                        default=128,
                        help="training batch size")
    parser.add_argument("-eval_batch_size",
                        type=int,
                        default=50,
                        help="inference batch size")

    parser.add_argument("-use_beam",
                        action="store_true",
                        help="use beam search, otherwise greedy search")
    parser.add_argument("-beam_size", type=int, default=2, help="beam size")
    parser.add_argument("-n_best",
                        type=int,
                        default=1,
                        help="stop searching when get n_best from beam search")

    # others
    parser.add_argument("-exp_id",
                        type=str,
                        default="res",
                        help="id of the current run")
    parser.add_argument("-res_root_dir",
                        type=str,
                        default="results",
                        help="dir to containing all the results")
    parser.add_argument("-save_model", default="model")
    parser.add_argument(
        "-save_mode",
        type=str,
        choices=["all", "best"],
        default="best",
        help="all: save models at each epoch; best: only save the best model")
    parser.add_argument("-device", type=int, default=0, help="0 cuda, -1 cpu")
    parser.add_argument(
        "-num_workers",
        type=int,
        default=8,
        help="num subprocesses used to load the data, 0: use main process")
    parser.add_argument(
        "-no_core_driver",
        action="store_true",
        help=
        "hdf5 driver, default use `core` (load into RAM), if specified, use `None`"
    )
    parser.add_argument(
        "-no_pin_memory",
        action="store_true",
        help="Don't use pin_memory=True for dataloader. "
        "ref: https://discuss.pytorch.org/t/should-we-set-non-blocking-to-true/38234/4"
    )
    parser.add_argument("-seed", default=2019, type=int)
    parser.add_argument("-debug", action="store_true")

    opt = parser.parse_args()
    # make paths
    if opt.debug:
        opt.res_root_dir = os.path.sep.join(
            opt.res_root_dir.split(os.path.sep)[:-1] + [
                "debug_results",
            ])

    opt.res_dir = os.path.join(
        opt.res_root_dir, "-".join(
            [opt.ctx_mode, opt.exp_id,
             time.strftime("%Y_%m_%d_%H_%M_%S")]))

    if os.path.exists(opt.res_dir):
        raise ValueError("File exists {}".format(opt.res_dir))
    else:
        os.makedirs(opt.res_dir)

    opt.log = os.path.join(opt.res_dir, opt.save_model)
    opt.save_model = os.path.join(opt.res_dir, opt.save_model)

    if opt.share_wd_cls_weight:
        assert opt.word_vec_size == opt.hidden_size, \
            "hidden size has to be the same as word embedding size when " \
            "sharing the word embedding weight and the final classifier weight"

    cfg_name = opt.save_model + ".cfg.json"
    args_dict = vars(opt)
    save_json(args_dict, cfg_name, save_pretty=True)

    opt.h5driver = None if opt.no_core_driver else "core"
    opt.num_workers = 1 if opt.no_core_driver else opt.num_workers
    opt.pin_memory = not opt.no_pin_memory
    opt.device = torch.device("cuda:0" if opt.device >= 0 else "cpu")

    if opt.vid_feat_size > 3000:  # 3072, the normalized concatenation of resnet+i3d
        assert opt.no_norm_vfeat
    return opt
Exemple #19
0
def eval_epoch(model,
               eval_dataset,
               opt,
               save_submission_filename,
               tasks=("SVMR", ),
               max_before_nms=1000,
               max_after_nms=100):
    model.eval()
    logger.info("Computing scores")
    logger.info("Start timing")
    # times = []  # do not use
    # for _ in range(3):
    #     st_time = time.time()
    if opt.use_intermediate:
        intermediate_cache_path = os.path.join(
            opt.results_dir, "{}_eval_res.pt".format(opt.eval_split_name))
        if not os.path.exists(intermediate_cache_path):
            logger.info("Saving intermediate results {}.".format(
                intermediate_cache_path))
            eval_res = compute_query_proposal_distance(model,
                                                       eval_dataset,
                                                       opt,
                                                       tasks=tasks)
            torch.save(eval_res, intermediate_cache_path)
        else:
            logger.info("Loading intermediate results {}.".format(
                intermediate_cache_path))
            eval_res = torch.load(intermediate_cache_path)
    else:
        logger.info(
            "Running without saving intermediate results, you might want to turn on --use_intermediate."
        )
        eval_res = compute_query_proposal_distance(model,
                                                   eval_dataset,
                                                   opt,
                                                   tasks=tasks)
    # del model  # We dont need model anymore

    # eval_res = compute_query_proposal_distance(model, eval_dataset, opt, tasks=tasks)

    logger.info("Generating predictions from scores")
    eval_submission_raw = dict(video2idx=eval_res["video2idx"])
    if "SVMR" in tasks:
        eval_submission_raw["SVMR"] = generate_svmr_predictions_from_res(
            eval_res, max_prop_per_query=max_before_nms)
    # vcmr_loading_time = 0
    if "VCMR" in tasks:
        if opt.external_inference_vr_res_path is not None:
            logger.info("Using external VR results from {}".format(
                opt.external_inference_vr_res_path))
            # vcmr_loading_time = time.time()
            eval_res["external_query2video"] = load_external_vr_res(
                opt.external_inference_vr_res_path, top_n_vr_videos=5)
            # vcmr_loading_time = time.time() - vcmr_loading_time
            vcmr_res, vr_res = generate_vcmr_predictions_from_res_with_external(
                eval_res, max_prop_per_query=max_before_nms)
        else:
            vcmr_res, vr_res = generate_vcmr_predictions_from_res(
                eval_res, max_prop_per_query=max_before_nms)
        eval_submission_raw["VCMR"] = vcmr_res
        eval_submission_raw["VR"] = vr_res
        # times += [time.time() - st_time - vcmr_loading_time]
    # times = torch.FloatTensor(times)
    IOU_THDS = (0.5, 0.7)

    logger.info("Saving/Evaluating before nms results")
    submission_path = os.path.join(opt.results_dir, save_submission_filename)
    eval_submission = get_submission_top_n(eval_submission_raw,
                                           top_n=max_after_nms)
    if max_after_nms < 1000:
        save_json(eval_submission, submission_path)
    else:
        torch.save(eval_submission, submission_path.replace(".json", ".pt"))

    metrics = eval_retrieval(eval_submission,
                             eval_dataset.query_data,
                             iou_thds=IOU_THDS,
                             match_number=not opt.debug,
                             verbose=opt.debug,
                             use_desc_type=opt.dset_name == "tvr")
    # metrics["time_avg"] = float(times.mean())
    # metrics["time_std"] = float(times.std())
    save_metrics_path = submission_path.replace(".json", "_metrics.json")
    save_json(metrics, save_metrics_path, save_pretty=True, sort_keys=False)
    latest_file_paths = [submission_path, save_metrics_path]

    if opt.nms_thd != -1:
        logger.info("Performing nms with nms_thd {}".format(opt.nms_thd))
        eval_submission_after_nms = dict(
            video2idx=eval_submission_raw["video2idx"])
        for k, nms_func in POST_PROCESSING_MMS_FUNC.items():
            if k in eval_submission_raw:
                eval_submission_after_nms[k] = nms_func(
                    eval_submission_raw[k],
                    nms_thd=opt.nms_thd,
                    max_before_nms=max_before_nms,
                    max_after_nms=max_after_nms)

        logger.info("Saving/Evaluating nms results")
        submission_nms_path = submission_path.replace(
            ".json", "_nms_thd_{}.json".format(opt.nms_thd))
        save_json(eval_submission_after_nms, submission_nms_path)
        metrics_nms = eval_retrieval(eval_submission_after_nms,
                                     eval_dataset.query_data,
                                     iou_thds=IOU_THDS,
                                     match_number=not opt.debug,
                                     verbose=opt.debug)
        save_metrics_nms_path = submission_nms_path.replace(
            ".json", "_metrics.json")
        save_json(metrics_nms,
                  save_metrics_nms_path,
                  save_pretty=True,
                  sort_keys=False)
        latest_file_paths += [submission_nms_path, save_metrics_nms_path]
    else:
        metrics_nms = None
    return metrics, metrics_nms, latest_file_paths
Exemple #20
0
                                 query_batch_size=query_batch_size)
        # use the 2nd one to report time
        profile_xml.get_ctx_encoding_time()
        ctx_enc_time = profile_xml.get_ctx_encoding_time()
        query_enc_time = profile_xml.get_query_encoding_time()
    elif model == "excl":
        profile_excl = ProfileExCL(ctx_batch_size=ctx_batch_size,
                                   query_batch_size=ctx_batch_size)
        # use the 2nd one to report time
        profile_excl.get_prediction_time()
        ctx_enc_time = profile_excl.get_prediction_time()
        query_enc_time = 0
        # Calculate the total time as ctx_enc_time * (100 * 1M / ctx_batch_size)
    else:
        raise NotImplementedError
    # ctx_enc_time = ctx_enc_time
    save_path = os.path.join(args.save_dir,
                             "{}_profile_main.json".format(model))

    n_videos = ProfileBase.N_Videos
    res = dict(ctx_enc_time=ctx_enc_time,
               ctx_enc_avg_time_all_videos=ctx_enc_time["avg"] * n_videos /
               ctx_batch_size,
               query_enc_time=query_enc_time,
               n_videos=n_videos,
               ctx_batch_size=ctx_batch_size,
               query_batch_size=query_batch_size,
               model=model)
    save_json(res, save_path, save_pretty=True)
    pprint.pprint(res)
Exemple #21
0
def eval_epoch(model,
               eval_dataset,
               opt,
               save_submission_filename,
               tasks=("SVMR", ),
               max_after_nms=100):
    """max_after_nms: always set to 100, since the eval script only evaluate top-100"""
    model.eval()
    logger.info("Computing scores")
    st_time = time.time()
    eval_submission_raw = get_eval_res(model, eval_dataset, opt, tasks)
    total_time = time.time() - st_time
    print("\n" + "\x1b[1;31m" + str(total_time) + "\x1b[0m", flush=True)

    IOU_THDS = (0.5, 0.7)  # (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
    logger.info("Saving/Evaluating before nms results")
    submission_path = os.path.join(opt.results_dir, save_submission_filename)
    eval_submission = get_submission_top_n(eval_submission_raw,
                                           top_n=max_after_nms)
    save_json(eval_submission, submission_path)

    if opt.eval_split_name == "val":  # since test_public has no GT
        metrics = eval_retrieval(eval_submission,
                                 eval_dataset.query_data,
                                 iou_thds=IOU_THDS,
                                 match_number=not opt.debug,
                                 verbose=opt.debug,
                                 use_desc_type=opt.dset_name == "tvr")
        save_metrics_path = submission_path.replace(".json", "_metrics.json")
        save_json(metrics,
                  save_metrics_path,
                  save_pretty=True,
                  sort_keys=False)
        latest_file_paths = [submission_path, save_metrics_path]
    else:
        metrics = None
        latest_file_paths = [
            submission_path,
        ]

    if opt.nms_thd != -1:
        logger.info("Performing nms with nms_thd {}".format(opt.nms_thd))
        eval_submission_after_nms = dict(
            video2idx=eval_submission_raw["video2idx"])
        for k, nms_func in POST_PROCESSING_MMS_FUNC.items():
            if k in eval_submission_raw:
                eval_submission_after_nms[k] = nms_func(
                    eval_submission_raw[k],
                    nms_thd=opt.nms_thd,
                    max_before_nms=opt.max_before_nms,
                    max_after_nms=max_after_nms)
        logger.info("Saving/Evaluating nms results")
        submission_nms_path = submission_path.replace(
            ".json", "_nms_thd_{}.json".format(opt.nms_thd))
        save_json(eval_submission_after_nms, submission_nms_path)
        if opt.eval_split_name == "val":
            metrics_nms = eval_retrieval(eval_submission_after_nms,
                                         eval_dataset.query_data,
                                         iou_thds=IOU_THDS,
                                         match_number=not opt.debug,
                                         verbose=opt.debug)
            save_metrics_nms_path = submission_nms_path.replace(
                ".json", "_metrics.json")
            save_json(metrics_nms,
                      save_metrics_nms_path,
                      save_pretty=True,
                      sort_keys=False)
            latest_file_paths += [submission_nms_path, save_metrics_nms_path]
        else:
            metrics_nms = None
            latest_file_paths = [
                submission_nms_path,
            ]
    else:
        metrics_nms = None
    return metrics, metrics_nms, latest_file_paths
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(json.load(open(hps_file)))
    model_config = f'{opts.output_dir}/log/model_config.json'

    # load DBs and image dirs
    video_ids = get_video_ids(opts.query_txt_db)
    video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                      model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db
    q_txt_db = QaQueryTokLmdb(opts.query_txt_db, -1)
    eval_dataset = ViolinEvalDataset(video_ids,
                                     video_db,
                                     q_txt_db,
                                     sampled_by_q=model_opts.sampled_by_q)
    collate_fn = violin_eval_collate

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])

    model = HeroForViolin.from_pretrained(model_config,
                                          state_dict=checkpoint,
                                          vfeat_dim=VFEAT_DIM,
                                          max_frm_seq_len=max_frm_seq_len)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=collate_fn)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results, logits = validate_violin(model, eval_dataloader, opts.split,
                                         opts.save_logits)
    result_dir = f'{opts.output_dir}/results_{opts.split}'
    if opts.save_logits:
        result_dir += '_w_logit'
    if not exists(result_dir) and hvd.rank() == 0:
        os.makedirs(result_dir)

    all_results = {}
    for id2res in all_gather_list(results):
        all_results.update(id2res)
    if opts.save_logits:
        all_logits = {}
        for id2logit in all_gather_list(logits):
            all_logits.update(id2logit)
    if hvd.rank() == 0:
        save_json(all_results,
                  f'{result_dir}/results_{opts.checkpoint}_all.json')
        LOGGER.info('All results written......')
        if opts.save_logits:
            save_pickle(all_logits,
                        f'{result_dir}/logits_{opts.checkpoint}_all.pkl')
            LOGGER.info('All logits written......')
Exemple #23
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    model_config = f'{opts.output_dir}/log/model_config.json'

    # load DBs and image dirs
    video_ids = get_video_ids(opts.query_txt_db)
    if opts.task != "didemo_video_only":
        video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                          model_opts.vfeat_interval,
                                          model_opts)
    else:
        txt_meta = load_json(os.path.join(opts.query_txt_db, "meta.json"))
        video_db = load_video_only_dataset(opts.vfeat_db, txt_meta,
                                           model_opts.vfeat_interval,
                                           model_opts)
    assert opts.split in opts.query_txt_db
    q_txt_db = QueryTokLmdb(opts.query_txt_db, -1)
    if opts.task != "didemo_video_only":
        inf_dataset = VcmrFullEvalDataset
    else:
        inf_dataset = VcmrVideoOnlyFullEvalDataset
    eval_dataset = inf_dataset(video_ids,
                               video_db,
                               q_txt_db,
                               distributed=model_opts.distributed_eval)

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = ("v_encoder.f_encoder.img_embeddings" +
                                ".position_embeddings.weight")
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])

    model = HeroForVcmr.from_pretrained(
        model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=model_opts.lw_neg_ctx,
        lw_neg_q=model_opts.lw_neg_q,
        lw_st_ed=0,
        ranking_loss_type=model_opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=model_opts.hard_pool_size,
        margin=model_opts.margin,
        use_all_neg=model_opts.use_all_neg,
        drop_svmr_prob=model_opts.drop_svmr_prob)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=vcmr_full_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results = validate_full_vcmr(model, eval_dataloader, opts.split, opts,
                                    model_opts)
    result_dir = f'{opts.output_dir}/results_{opts.split}'

    if not exists(result_dir) and rank == 0:
        os.makedirs(result_dir)

    all_results = list(concat(all_gather_list(results)))
    if hvd.rank() == 0:
        save_json(all_results,
                  f'{result_dir}/results_{opts.checkpoint}_all.json')
        LOGGER.info('All results written......')
Exemple #24
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    opts.n_gpu = n_gpu
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    if hvd.rank() != 0:
        LOGGER.disabled = True
    set_random_seed(opts.seed)

    # train_examples = None
    LOGGER.info(f"Loading the whole video dataset {opts.sub_txt_db}, "
                f"{opts.vfeat_db}")
    if opts.task != "didemo_video_only":
        video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                          opts.vfeat_interval, opts)
    else:
        txt_meta = load_json(join(opts.train_query_txt_db, "meta.json"))
        video_db = load_video_only_dataset(opts.vfeat_db, txt_meta,
                                           opts.vfeat_interval, opts)

    # data loaders
    # train
    video_ids = get_video_ids(opts.train_query_txt_db)
    train_q_txt_db = QueryTokLmdb(opts.train_query_txt_db, opts.max_txt_len)
    train_dataloaders = build_downstream_dataloaders([opts.task],
                                                     video_db,
                                                     video_ids,
                                                     True,
                                                     opts,
                                                     shuffle=True,
                                                     q_txt_db=train_q_txt_db)
    meta_loader = MetaLoader(train_dataloaders,
                             accum_steps=opts.gradient_accumulation_steps,
                             distributed=n_gpu > 1)
    meta_loader = PrefetchLoader(meta_loader)

    # val
    video_ids = get_video_ids(opts.val_query_txt_db)
    val_q_txt_db = QueryTokLmdb(opts.val_query_txt_db, -1)
    val_dataloaders = build_downstream_dataloaders([opts.task],
                                                   video_db,
                                                   video_ids,
                                                   False,
                                                   opts,
                                                   q_txt_db=val_q_txt_db)

    if opts.task != "didemo_video_only":
        inf_dataset = VcmrFullEvalDataset
    else:
        inf_dataset = VcmrVideoOnlyFullEvalDataset
    LOGGER.info(f"Loading Inference Dataset {opts.val_query_txt_db} (val)")
    val_dset = inf_dataset(video_ids,
                           video_db,
                           val_q_txt_db,
                           distributed=opts.distributed_eval)
    inf_loader_val = DataLoader(val_dset,
                                batch_size=opts.vcmr_eval_q_batch_size,
                                num_workers=opts.n_workers,
                                pin_memory=opts.pin_mem,
                                collate_fn=vcmr_full_eval_collate)
    inf_loader_val = PrefetchLoader(inf_loader_val)
    if opts.test_query_txt_db:
        LOGGER.info(
            f"Loading Inference Dataset {opts.test_query_txt_db} (test)")
        video_ids = get_video_ids(opts.test_query_txt_db)
        test_q_txt_db = QueryTokLmdb(opts.test_query_txt_db, -1)
        test_dset = inf_dataset(video_ids,
                                video_db,
                                test_q_txt_db,
                                distributed=opts.distributed_eval)
        inf_loader_test = DataLoader(test_dset,
                                     batch_size=opts.vcmr_eval_q_batch_size,
                                     num_workers=opts.n_workers,
                                     pin_memory=opts.pin_mem,
                                     collate_fn=vcmr_full_eval_collate)
        inf_loader_test = PrefetchLoader(inf_loader_test)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}
    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    if img_pos_embed_weight_key in checkpoint:
        max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])
    else:
        max_frm_seq_len = MAX_FRM_SEQ_LEN

    model = HeroForVcmr.from_pretrained(
        opts.model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=opts.lw_neg_ctx,
        lw_neg_q=opts.lw_neg_q,
        lw_st_ed=0,
        ranking_loss_type=opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=opts.hard_pool_size,
        margin=opts.margin,
        use_all_neg=opts.use_all_neg,
        drop_svmr_prob=opts.drop_svmr_prob)

    model.to(device)
    # make sure every process has same model parameters in the beginning
    broadcast_tensors([p.data for p in model.parameters()], 0)
    set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    task2scaler = {t: i for i, t in enumerate(train_dataloaders.keys())}
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      num_losses=len(task2scaler),
                                      enabled=opts.fp16,
                                      opt_level='O2')
    restorer = TrainingRestorer(opts, model, optimizer)
    global_step = restorer.global_step
    TB_LOGGER.global_step = global_step
    if hvd.rank() == 0:
        save_training_meta(opts)
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        pbar = tqdm(total=opts.num_train_steps)
        model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))
        if not exists(join(opts.output_dir, 'results')):
            # store tvr predictions
            os.makedirs(join(opts.output_dir, 'results'))
        if opts.nms_thd != -1:
            # store tvr-nms predictions
            if not exists(join(opts.output_dir, 'results_nms')):
                os.makedirs(join(opts.output_dir, 'results_nms'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
    else:
        pbar = NoOp()
        model_saver = NoOp()
        restorer = NoOp()

    if global_step > 0:
        pbar.update(global_step)
    LOGGER.info(f"***** Running training with {n_gpu} GPUs *****")
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)

    task2loss = {
        task: RunningMeter(f'loss/{task}')
        for task in train_dataloaders.keys()
    }

    for obj in (f'{opts.task}_st_ed', f'{opts.task}_neg_ctx',
                f'{opts.task}_neg_q'):
        task2loss[obj] = RunningMeter(f'loss/{obj}')
    model.train()
    n_examples = defaultdict(int)
    start = time()
    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    if global_step == 0:
        optimizer.step()
    for step, (task, batch) in enumerate(meta_loader):
        if len(opts.hard_negtiave_start_step) > 0:
            for i, hn_step in enumerate(opts.hard_negtiave_start_step):
                if global_step >= hn_step and hn_step != -1:
                    model.set_hard_negative(True, opts.hard_pool_size[i],
                                            opts.hard_neg_weights[i])
        if opts.train_span_start_step != -1 and\
                global_step >= opts.train_span_start_step:
            model.set_train_st_ed(opts.lw_st_ed)

        n_examples[task] += opts.train_batch_size

        loss = model(batch, task=task, compute_loss=True)

        loss_st_ed, loss_neg_ctx, loss_neg_q = loss
        loss = loss_st_ed + loss_neg_ctx + loss_neg_q
        for n, ls, w in (('st_ed', loss_st_ed, opts.lw_st_ed),
                         ('neg_ctx', loss_neg_ctx, opts.lw_neg_ctx),
                         ('neg_q', loss_neg_q, opts.lw_neg_q)):
            ls = ls.item()
            if w:
                ls /= w
            task2loss[f'{task}_{n}'](ls)

        loss = loss.mean()
        task2loss[task](loss.item())

        delay_unscale = (step + 1) % opts.gradient_accumulation_steps != 0
        with amp.scale_loss(loss,
                            optimizer,
                            delay_unscale=delay_unscale,
                            loss_id=task2scaler[task]) as scaled_loss:
            scaled_loss.backward()
            if not delay_unscale:
                # gather gradients from every processes
                # do this before unscaling to make sure every process uses
                # the same gradient scale
                grads = [
                    p.grad.data for p in model.parameters()
                    if p.requires_grad and p.grad is not None
                ]
                all_reduce_and_rescale_tensors(grads, float(1))

        if (step + 1) % opts.gradient_accumulation_steps == 0:
            global_step += 1

            # learning rate scheduling
            lr_this_step = get_lr_sched(global_step, opts)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_this_step
            TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

            # log loss
            TB_LOGGER.log_scaler_dict({
                temp_loss.name: temp_loss.val
                for temp_loss in task2loss.values()
                if temp_loss.val is not None
            })
            TB_LOGGER.step()

            # update model params
            if opts.grad_norm != -1:
                grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                            opts.grad_norm)
                TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
            optimizer.step()
            optimizer.zero_grad()
            pbar.update(1)

            if global_step % 100 == 0:
                # monitor training throughput
                LOGGER.info('-------------------------------------------')
                LOGGER.info(f'Step {global_step}:')
                for t in train_dataloaders.keys():
                    tot_ex = sum(all_gather_list(n_examples[t]))
                    ex_per_sec = int(tot_ex / (time() - start))
                    LOGGER.info(f'{t}: {tot_ex} examples trained at '
                                f'{ex_per_sec} ex/s')
                    TB_LOGGER.add_scalar(f'perf/{t}_ex_per_s', ex_per_sec,
                                         global_step)

            if global_step % opts.valid_steps == 0:
                LOGGER.info('===========================================')
                LOGGER.info(f"Step {global_step}: start running validation")
                validate(model, val_dataloaders, opts)
                if hvd.rank() == 0 or opts.distributed_eval:
                    log, results = validate_full_vcmr(model,
                                                      inf_loader_val,
                                                      'val',
                                                      opts,
                                                      model_opts=opts)
                    save_json(
                        results, f'{opts.output_dir}/results/'
                        f'val_results_{global_step}_rank{hvd.rank()}.json')
                    TB_LOGGER.log_scaler_dict(log)
                    if opts.test_query_txt_db:
                        log, results = validate_full_vcmr(model,
                                                          inf_loader_test,
                                                          'test',
                                                          opts,
                                                          model_opts=opts)
                        save_json(
                            results, f'{opts.output_dir}/results/'
                            f'test_results_{global_step}_rank{hvd.rank()}.json'
                        )
                        TB_LOGGER.log_scaler_dict(log)
                LOGGER.info('===========================================')
                model_saver.save(model, global_step)

            # step restorer in the end to prevent missing validation checkpoint
            restorer.step()
        if global_step >= opts.num_train_steps:
            break

    LOGGER.info('===========================================')
    if global_step % opts.valid_steps != 0:
        if hvd.rank() == 0 or opts.distributed_eval:
            log, results = validate_full_vcmr(model,
                                              inf_loader_val,
                                              'val',
                                              opts,
                                              model_opts=opts)
            save_json(
                results, f'{opts.output_dir}/results/'
                f'val_results_{global_step}'
                f'_rank{hvd.rank()}_final.json')
            TB_LOGGER.log_scaler_dict(log)
            if opts.test_query_txt_db:
                log, results = validate_full_vcmr(model,
                                                  inf_loader_test,
                                                  'test',
                                                  opts,
                                                  model_opts=opts)
                save_json(
                    results, f'{opts.output_dir}/results/'
                    f'test_results_{global_step}_rank{hvd.rank()}.json')
                TB_LOGGER.log_scaler_dict(log)
    model_saver.save(model, f'{global_step}_final')
Exemple #25
0
def main():
    parser = argparse.ArgumentParser(description="translate.py")

    parser.add_argument("-eval_split_name", choices=["val", "test_public"])
    parser.add_argument("-eval_path", type=str, help="Path to eval data")
    parser.add_argument("-reference_path",
                        type=str,
                        default=None,
                        help="Path to reference")
    parser.add_argument("-res_dir",
                        required=True,
                        help="path to dir containing model .pt file")
    parser.add_argument("-batch_size",
                        type=int,
                        default=100,
                        help="batch size")

    # beam search configs
    parser.add_argument("-use_beam",
                        action="store_true",
                        help="use beam search, otherwise greedy search")
    parser.add_argument("-beam_size", type=int, default=2, help="beam size")
    parser.add_argument("-n_best",
                        type=int,
                        default=1,
                        help="stop searching when get n_best from beam search")
    parser.add_argument("-min_sen_len",
                        type=int,
                        default=8,
                        help="minimum length of the decoded sentences")
    parser.add_argument("-max_sen_len",
                        type=int,
                        default=25,
                        help="maximum length of the decoded sentences")
    parser.add_argument("-block_ngram_repeat",
                        type=int,
                        default=0,
                        help="block repetition of ngrams during decoding.")
    parser.add_argument("-length_penalty_name",
                        default="none",
                        choices=["none", "wu", "avg"],
                        help="length penalty to use.")
    parser.add_argument(
        "-length_penalty_alpha",
        type=float,
        default=0.,
        help="Google NMT length penalty parameter (higher = longer generation)"
    )

    parser.add_argument("-no_cuda", action="store_true")
    parser.add_argument("-seed", default=2019, type=int)
    parser.add_argument("-debug", action="store_true")

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda

    # random seed
    random.seed(opt.seed)
    np.random.seed(opt.seed)
    torch.manual_seed(opt.seed)

    checkpoint = torch.load(os.path.join(opt.res_dir, "model.chkpt"))

    decoding_strategy = "beam{}_lp_{}_la_{}".format(
        opt.beam_size, opt.length_penalty_name,
        opt.length_penalty_alpha) if opt.use_beam else "greedy"
    save_json(vars(opt),
              os.path.join(opt.res_dir,
                           "{}_eval_cfg.json".format(decoding_strategy)),
              save_pretty=True)

    # add some of the train configs
    train_opt = checkpoint[
        "opt"]  # EDict(load_json(os.path.join(opt.res_dir, "model.cfg.json")))
    for k in train_opt.__dict__:
        if k not in opt.__dict__:
            setattr(opt, k, getattr(train_opt, k))

    if "ctx_mode" not in opt:
        opt.ctx_mode = "video_sub"  # temp hack, since the first experiment does not have such a setting

    eval_data_loader = get_data_loader(opt)

    # setup model
    translator = Translator(opt, checkpoint)

    pred_file = os.path.join(
        opt.res_dir, "{}_pred_{}.jsonl".format(decoding_strategy,
                                               opt.eval_split_name))
    pred_file = os.path.abspath(pred_file)
    if not os.path.exists(pred_file):
        json_res = run_translate(eval_data_loader, translator, opt=opt)
        save_jsonl(json_res, pred_file)
    else:
        print("Using existing prediction file at {}".format(pred_file))

    if opt.reference_path:
        # COCO language evaluation
        reference_path = os.path.abspath(opt.reference_path)
        metrics_path = pred_file.replace(".json", "_lang_metrics.json")
        eval_cmd = [
            "python", "evaluate.py", "-s", pred_file, "-o", metrics_path, "-r",
            reference_path
        ]
        subprocess.call(eval_cmd, cwd="standalone_eval")

    print("[Info] Finished {}.".format(opt.eval_split_name))