コード例 #1
0
def get_video_ids(query_txt_db):
    if os.path.exists(f'{query_txt_db}/query2video.json'):
        q2v = load_json(f'{query_txt_db}/query2video.json')
        qids = load_json(f'{query_txt_db}/id2len.json').keys()
        video_ids = list(set([q2v[qid] for qid in qids]))
    else:
        video_ids = load_json(f'{query_txt_db}/video_ids.json')
    return video_ids
コード例 #2
0
def combine(video_name_split_path, video_duration_path, save_path):
    video_name_split = load_json(video_name_split_path)
    video_duration_dict = load_json(video_duration_path)

    combined_dict = {}
    for split_name, split_video_names in video_name_split.items():
        combined_dict[split_name] = {
            vid_name: video_duration_dict[vid_name]
            for vid_name in split_video_names
        }
    save_json(combined_dict, save_path)
コード例 #3
0
ファイル: build_vocab.py プロジェクト: vyraun/TVCaption
def load_transform_data(data_path):
    data = load_json(data_path)
    transformed_data = []
    for v_id, cap in data.items():
        cap["v_id"] = v_id
        transformed_data.append(cap)
    return transformed_data
コード例 #4
0
ファイル: save.py プロジェクト: linjieli222/HERO
def save_training_meta(args):
    # Comment out, since rank is not saved to args. Safeguard save_training_meta already in training scripts.
    # if args.rank > 0:
    #    return

    # args is an EasyDict object, treat it the same as a normal dict
    os.makedirs(join(args.output_dir, 'log'), exist_ok=True)
    os.makedirs(join(args.output_dir, 'ckpt'), exist_ok=True)

    # training args
    save_args_path = join(args.output_dir, 'log', 'hps.json')
    save_json(vars(args), save_args_path, save_pretty=True)

    # model args
    model_config = load_json(args.model_config)
    save_model_config_path = join(args.output_dir, 'log', 'model_config.json')
    save_json(model_config, save_model_config_path, save_pretty=True)
    # git info
    try:
        LOGGER.info("Waiting on git info....")
        c = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"],
                           timeout=10,
                           stdout=subprocess.PIPE)
        git_branch_name = c.stdout.decode().strip()
        LOGGER.info("Git branch: %s", git_branch_name)
        c = subprocess.run(["git", "rev-parse", "HEAD"],
                           timeout=10,
                           stdout=subprocess.PIPE)
        git_sha = c.stdout.decode().strip()
        LOGGER.info("Git SHA: %s", git_sha)
        git_dir = abspath(dirname(__file__))
        git_status = subprocess.check_output(['git', 'status', '--short'],
                                             cwd=git_dir,
                                             universal_newlines=True).strip()
        with open(join(args.output_dir, 'log', 'git_info.json'),
                  'w') as writer:
            json.dump(
                {
                    'branch': git_branch_name,
                    'is_dirty': bool(git_status),
                    'status': git_status,
                    'sha': git_sha
                },
                writer,
                indent=4)
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError) as e:
        LOGGER.exception(e)
        LOGGER.warn("Git info not found. Saving code into zip instead...")
        # save a copy of the codebase.
        # !!!Do not store heavy file in your codebase when using it.
        code_dir = dirname(dirname(realpath(__file__)))
        code_zip_filename = os.path.join(args.output_dir, "code.zip")
        LOGGER.info(f"Saving code from {code_dir} to {code_zip_filename}...")
        make_zipfile(code_dir,
                     code_zip_filename,
                     enclosing_dir="code",
                     exclude_dirs_substring="results",
                     exclude_dirs=["results", "debug_results", "__pycache__"],
                     exclude_extensions=[".pyc", ".ipynb", ".swap"])
        LOGGER.info("Saving code done.")
コード例 #5
0
def eval_language_metrics(checkpoint,
                          eval_data_loader,
                          opt,
                          model=None,
                          eval_mode="val"):
    """eval_mode can only be set to `val` here, as setting to `test` is cheating
    0, run inference
    1, Get METEOR, BLEU1-4, CIDEr scores
    2, Get vocab size, sentence length
    """
    translator = Translator(opt, checkpoint, model=model)
    json_res = run_translate(eval_data_loader, translator, opt=opt)
    res_filepath = os.path.abspath(
        opt.save_model + "_tmp_greedy_pred_{}.json".format(eval_mode))
    save_jsonl(json_res, res_filepath)

    # COCO language evaluation
    reference_path = os.path.abspath(opt.reference_path)
    metrics_path = res_filepath.replace(".json", "_lang_metrics.json")
    eval_cmd = [
        "python", "evaluate.py", "-s", res_filepath, "-o", metrics_path, "-r",
        reference_path
    ]
    subprocess.call(eval_cmd, cwd="standalone_eval")
    metrics = load_json(metrics_path)
    return metrics, [res_filepath, metrics_path]
コード例 #6
0
ファイル: ivcml_data.py プロジェクト: zhixinma/HERO
def load_model(opts, device):
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    model_config = f'{opts.output_dir}/log/model_config.json'
    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = (
        "v_encoder.f_encoder.img_embeddings.position_embeddings.weight")
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])
    model = HeroForVcmr.from_pretrained(
        model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=model_opts.lw_neg_ctx,
        lw_neg_q=model_opts.lw_neg_q,
        lw_st_ed=0,
        ranking_loss_type=model_opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=model_opts.hard_pool_size,
        margin=model_opts.margin,
        use_all_neg=model_opts.use_all_neg,
        drop_svmr_prob=model_opts.drop_svmr_prob)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')
    return model, model_opts
コード例 #7
0
def load_external_vr_res2(external_vr_res_path, top_n_vr_videos=5):
    """return a mapping from desc_id to top retrieved video info"""
    external_vr_res = load_json(external_vr_res_path)
    external_vr_res = get_submission_top_n(external_vr_res,
                                           top_n=top_n_vr_videos)["VR"]
    query2video = {e["desc_id"]: e["predictions"] for e in external_vr_res}
    return query2video
コード例 #8
0
ファイル: mix_model_prediction.py プロジェクト: Worm4047/TVR
def load_saved_res(pred_path):
    if pred_path.endswith(".json"):
        pred = load_json(pred_path)
    else:
        pred = torch.load(pred_path)
    vcmr_res = {e["desc_id"]: e for e in pred["VCMR"]}
    video2idx = pred["video2idx"]
    return vcmr_res, video2idx
コード例 #9
0
ファイル: tvc_dataset.py プロジェクト: vyraun/TVCaption
    def __init__(self,
                 ctx_mode,
                 data_path,
                 sub_meta_path,
                 vid_h5_path_or_handler,
                 word2idx_path,
                 max_cap_len,
                 max_v_len,
                 max_sub_len,
                 h5driver=None,
                 clip_length=1.5,
                 normalize_vfeat=True,
                 is_eval=False,
                 data_ratio=1.0):

        self.ctx_mode = ctx_mode
        self.use_video = "video" in ctx_mode
        self.use_sub = "sub" in ctx_mode
        self.is_eval = is_eval
        self.data_ratio = data_ratio
        self.word2idx = load_json(word2idx_path)
        self.idx2word = {int(v): k for k, v in self.word2idx.items()}
        self.clip_length = clip_length
        self.sub_meta_path = sub_meta_path
        self.max_v_len = max_v_len
        self.max_cap_len = max_cap_len  # sen
        self.max_sub_len = max_sub_len
        self.normalize_vfeat = normalize_vfeat

        if self.use_video:
            if isinstance(vid_h5_path_or_handler, h5py.File):
                self.vid_h5 = vid_h5_path_or_handler
            else:
                self.vid_h5 = h5py.File(vid_h5_path_or_handler,
                                        "r",
                                        driver=h5driver)

        if self.use_sub:
            self.sub_meta_dict = load_process_sub_meta(sub_meta_path,
                                                       clip_length)

        self.word2idx = load_json(word2idx_path)
        self.idx2word = {int(v): k for k, v in self.word2idx.items()}
        self.data = self._load_data(data_path)
コード例 #10
0
def load_external_vr_res_with_scores(external_vr_res_path, top_n_vr_videos=5):
    """return a mapping from desc_id to top retrieved (vid_name, score)"""
    external_vr_res = load_json(external_vr_res_path)
    external_vr_res = get_submission_top_n(external_vr_res,
                                           top_n=top_n_vr_videos)["VR"]
    query2video = {
        e["desc_id"]: [[sub_e[0], sub_e[3]] for sub_e in e["predictions"]]
        for e in external_vr_res
    }
    return query2video
コード例 #11
0
def main(opts):
    if not exists(opts.output):
        os.makedirs(opts.output)
    else:
        raise ValueError('Found existing DB. Please explicitly remove '
                         'for re-processing')
    meta = vars(opts)
    meta['tokenizer'] = opts.toker
    toker = RobertaTokenizer.from_pretrained(opts.toker)
    tokenizer = roberta_tokenize(toker)
    meta['BOS'] = toker.convert_tokens_to_ids(['<s>'])[0]
    meta['EOS'] = toker.convert_tokens_to_ids(['</s>'])[0]
    meta['SEP'] = toker.convert_tokens_to_ids(['</s>'])[0]
    meta['CLS'] = toker.convert_tokens_to_ids(['<s>'])[0]
    meta['PAD'] = toker.convert_tokens_to_ids(['<pad>'])[0]
    meta['MASK'] = toker.convert_tokens_to_ids(['<mask>'])[0]
    meta['UNK'] = toker.convert_tokens_to_ids(['<unk>'])[0]
    meta['v_range'] = (toker.convert_tokens_to_ids(['.'])[0],
                       toker.convert_tokens_to_ids(['<|endoftext|>'])[0] + 1)
    save_json(vars(opts), f'{opts.output}/meta.json', save_pretty=True)

    open_db = curry(open_lmdb, opts.output, readonly=False)
    with open_db() as db:
        sub_info_cache_path = f'{opts.output}/sub_info.json'
        try:
            vid2nframe = load_json(opts.vid2nframe)
        except Exception:
            vid2nframe = None
        if not os.path.exists(sub_info_cache_path):
            video2sub_info = load_process_sub_meta(
                opts.annotation, vid2nframe, frame_length=args.frame_length)
            save_json(video2sub_info, sub_info_cache_path)
        else:
            video2sub_info = load_json(sub_info_cache_path)
        with open(opts.annotation) as ann:
            vid2len, vid2max_frame_sub_len = process_tv_subtitles(
                ann, video2sub_info, db, tokenizer, meta['SEP'])

        save_json(vid2len, f'{opts.output}/vid2len.json')
        save_json(vid2max_frame_sub_len,
                  f'{opts.output}/vid2max_frame_sub_len.json')
コード例 #12
0
ファイル: start_end_dataset.py プロジェクト: Worm4047/TVR
    def __init__(self, dset_name, data_path, desc_bert_path_or_handler, sub_bert_path_or_handler,
                 max_desc_len, max_ctx_len,
                 vid_feat_path_or_handler, clip_length, ctx_mode="video",
                 normalize_vfeat=True, normalize_tfeat=True, h5driver=None, data_ratio=1.0,
                 video_duration_idx_path=None, eval_split_name=None):
        self.dset_name = dset_name
        self.data_path = data_path
        self.data_ratio = data_ratio

        self.desc_bert_path_or_handler = desc_bert_path_or_handler
        self.max_desc_len = max_desc_len

        self.sub_bert_path_or_handler = sub_bert_path_or_handler
        self.max_ctx_len = max_ctx_len
        self.vid_feat_path_or_handler = vid_feat_path_or_handler
        self.clip_length = clip_length
        self.ctx_mode = ctx_mode

        # prepare desc data
        self.data = load_jsonl(data_path)
        if self.data_ratio != 1:
            n_examples = int(len(self.data) * data_ratio)
            self.data = self.data[:n_examples]
            logger.info("Using {}% of the data: {} examples".format(data_ratio * 100, n_examples))

        self.use_video = "video" in self.ctx_mode
        self.use_sub = "sub" in self.ctx_mode
        self.use_tef = "tef" in self.ctx_mode

        if self.use_video:
            if isinstance(vid_feat_path_or_handler, h5py.File):
                self.vid_feat_h5 = vid_feat_path_or_handler
            else:  # str path
                self.vid_feat_h5 = h5py.File(vid_feat_path_or_handler, "r", driver=h5driver)

        if isinstance(desc_bert_path_or_handler, h5py.File):
            self.desc_bert_h5 = desc_bert_path_or_handler
        else:
            self.desc_bert_h5 = h5py.File(desc_bert_path_or_handler, "r", driver=h5driver)

        if self.use_sub:
            if isinstance(sub_bert_path_or_handler, h5py.File):
                self.sub_bert_h5 = sub_bert_path_or_handler
            else:  # str path
                self.sub_bert_h5 = h5py.File(sub_bert_path_or_handler, "r", driver=h5driver)

        self.normalize_vfeat = normalize_vfeat
        self.normalize_tfeat = normalize_tfeat

        if video_duration_idx_path is not None:
            video_data = load_json(video_duration_idx_path)[eval_split_name]
            self.video_data = [{"vid_name": k, "duration": v[0]} for k, v in video_data.items()]
            self.video2idx = {k: v[1] for k, v in video_data.items()}
コード例 #13
0
ファイル: start_end_dataset.py プロジェクト: Worm4047/TVR
    def __init__(self, dset_name, eval_split_name, data_path=None,
                 desc_bert_path_or_handler=None, max_desc_len=None,  max_ctx_len=None,
                 sub_bert_path_or_handler=None, vid_feat_path_or_handler=None,
                 video_duration_idx_path=None, clip_length=None,
                 ctx_mode="video", data_mode="context",
                 h5driver=None, data_ratio=1.0, normalize_vfeat=True, normalize_tfeat=True):
        self.dset_name = dset_name
        self.eval_split_name = eval_split_name
        self.ctx_mode = ctx_mode
        self.load_gt_video = False
        self.data_ratio = data_ratio  # only affect query data
        self.normalize_vfeat = normalize_vfeat
        self.normalize_tfeat = normalize_tfeat

        self.data_mode = None
        self.set_data_mode(data_mode)

        self.max_desc_len = max_desc_len
        self.max_ctx_len = max_ctx_len
        self.data_path = data_path
        self.query_data = load_jsonl(data_path)
        if data_ratio != 1:
            n_examples = int(len(self.query_data) * data_ratio)
            self.query_data = self.query_data[:n_examples]
            logger.info("Using {}% of the data: {} examples".format(data_ratio * 100, n_examples))
        if isinstance(desc_bert_path_or_handler, h5py.File):
            self.desc_bert_h5 = desc_bert_path_or_handler
        else:
            self.desc_bert_h5 = h5py.File(desc_bert_path_or_handler, "r", driver=h5driver)

        video_data = load_json(video_duration_idx_path)[self.eval_split_name]
        self.video_data = {k: v[0] for k, v in video_data.items()}
        self.video2idx = {k: v[1] for k, v in video_data.items()}
        self.clip_length = clip_length

        self.use_video = "video" in self.ctx_mode
        self.use_sub = "sub" in self.ctx_mode
        self.use_tef = "tef" in self.ctx_mode

        if self.use_video:
            if isinstance(vid_feat_path_or_handler, h5py.File):
                self.vid_feat_h5 = vid_feat_path_or_handler
            else:  # str path
                self.vid_feat_h5 = h5py.File(vid_feat_path_or_handler, "r", driver=h5driver)

        if self.use_sub:
            if isinstance(sub_bert_path_or_handler, h5py.File):
                self.sub_bert_h5 = sub_bert_path_or_handler
            else:  # str path
                self.sub_bert_h5 = h5py.File(sub_bert_path_or_handler, "r", driver=h5driver)
コード例 #14
0
ファイル: ivcml_data.py プロジェクト: zhixinma/HERO
def build_dataloader(opts):
    # Load ground truth, query db and video db
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    video_ids = get_video_ids(opts.query_txt_db)
    video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                      model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db, (opts.split, opts.query_txt_db)
    q_txt_db = QueryTokLmdb(opts.query_txt_db, -1)

    eval_dataset = VcmrFullEvalDataset(video_ids,
                                       video_db,
                                       q_txt_db,
                                       distributed=model_opts.distributed_eval)
    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=vcmr_full_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)
    return eval_dataloader
コード例 #15
0
def main_convert():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--src_h5_file",
                        type=str,
                        help="subtitle words level feature .h5 file")
    parser.add_argument("--vid_clip_h5_file",
                        type=str,
                        help="video clip level feature .h5 file")
    parser.add_argument("--sub_meta_path",
                        type=str,
                        help="processed subtitle .jsonl path")
    parser.add_argument("--tgt_h5_file",
                        type=str,
                        help=".h5 path to stores the converted data")
    parser.add_argument("--pool_type",
                        type=str,
                        default="max",
                        choices=["max", "avg"],
                        help="how to aggreate frame features")
    parser.add_argument("--clip_length", type=float, default=1.5)
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()

    sub_info_cache_path = args.tgt_h5_file.replace(".h5", "_sub_info.json")
    if not os.path.exists(sub_info_cache_path):
        video2sub_info = load_process_sub_meta(args.sub_meta_path,
                                               clip_length=args.clip_length)
        save_json(video2sub_info, sub_info_cache_path)
    else:
        video2sub_info = load_json(sub_info_cache_path)
    with h5py.File(args.src_h5_file, "r") as src_h5:
        with h5py.File(args.vid_clip_h5_file, "r") as vid_clip_h5:
            with h5py.File(args.tgt_h5_file, "w") as tgt_h5:
                convert_h5(src_h5,
                           vid_clip_h5,
                           tgt_h5,
                           video2sub_info,
                           pool_type=args.pool_type,
                           debug=args.debug)
コード例 #16
0
ファイル: train_vcmr.py プロジェクト: zzzzlalala/HERO
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    opts.n_gpu = n_gpu
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    if hvd.rank() != 0:
        LOGGER.disabled = True
    set_random_seed(opts.seed)

    # train_examples = None
    LOGGER.info(f"Loading the whole video dataset {opts.sub_txt_db}, "
                f"{opts.vfeat_db}")
    if opts.task != "didemo_video_only":
        video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                          opts.vfeat_interval, opts)
    else:
        txt_meta = load_json(join(opts.train_query_txt_db, "meta.json"))
        video_db = load_video_only_dataset(opts.vfeat_db, txt_meta,
                                           opts.vfeat_interval, opts)

    # data loaders
    # train
    video_ids = get_video_ids(opts.train_query_txt_db)
    train_q_txt_db = QueryTokLmdb(opts.train_query_txt_db, opts.max_txt_len)
    train_dataloaders = build_downstream_dataloaders([opts.task],
                                                     video_db,
                                                     video_ids,
                                                     True,
                                                     opts,
                                                     shuffle=True,
                                                     q_txt_db=train_q_txt_db)
    meta_loader = MetaLoader(train_dataloaders,
                             accum_steps=opts.gradient_accumulation_steps,
                             distributed=n_gpu > 1)
    meta_loader = PrefetchLoader(meta_loader)

    # val
    video_ids = get_video_ids(opts.val_query_txt_db)
    val_q_txt_db = QueryTokLmdb(opts.val_query_txt_db, -1)
    val_dataloaders = build_downstream_dataloaders([opts.task],
                                                   video_db,
                                                   video_ids,
                                                   False,
                                                   opts,
                                                   q_txt_db=val_q_txt_db)

    if opts.task != "didemo_video_only":
        inf_dataset = VcmrFullEvalDataset
    else:
        inf_dataset = VcmrVideoOnlyFullEvalDataset
    LOGGER.info(f"Loading Inference Dataset {opts.val_query_txt_db} (val)")
    val_dset = inf_dataset(video_ids,
                           video_db,
                           val_q_txt_db,
                           distributed=opts.distributed_eval)
    inf_loader_val = DataLoader(val_dset,
                                batch_size=opts.vcmr_eval_q_batch_size,
                                num_workers=opts.n_workers,
                                pin_memory=opts.pin_mem,
                                collate_fn=vcmr_full_eval_collate)
    inf_loader_val = PrefetchLoader(inf_loader_val)
    if opts.test_query_txt_db:
        LOGGER.info(
            f"Loading Inference Dataset {opts.test_query_txt_db} (test)")
        video_ids = get_video_ids(opts.test_query_txt_db)
        test_q_txt_db = QueryTokLmdb(opts.test_query_txt_db, -1)
        test_dset = inf_dataset(video_ids,
                                video_db,
                                test_q_txt_db,
                                distributed=opts.distributed_eval)
        inf_loader_test = DataLoader(test_dset,
                                     batch_size=opts.vcmr_eval_q_batch_size,
                                     num_workers=opts.n_workers,
                                     pin_memory=opts.pin_mem,
                                     collate_fn=vcmr_full_eval_collate)
        inf_loader_test = PrefetchLoader(inf_loader_test)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}
    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    if img_pos_embed_weight_key in checkpoint:
        max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])
    else:
        max_frm_seq_len = MAX_FRM_SEQ_LEN

    model = HeroForVcmr.from_pretrained(
        opts.model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=opts.lw_neg_ctx,
        lw_neg_q=opts.lw_neg_q,
        lw_st_ed=0,
        ranking_loss_type=opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=opts.hard_pool_size,
        margin=opts.margin,
        use_all_neg=opts.use_all_neg,
        drop_svmr_prob=opts.drop_svmr_prob)

    model.to(device)
    # make sure every process has same model parameters in the beginning
    broadcast_tensors([p.data for p in model.parameters()], 0)
    set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    task2scaler = {t: i for i, t in enumerate(train_dataloaders.keys())}
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      num_losses=len(task2scaler),
                                      enabled=opts.fp16,
                                      opt_level='O2')
    restorer = TrainingRestorer(opts, model, optimizer)
    global_step = restorer.global_step
    TB_LOGGER.global_step = global_step
    if hvd.rank() == 0:
        save_training_meta(opts)
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        pbar = tqdm(total=opts.num_train_steps)
        model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))
        if not exists(join(opts.output_dir, 'results')):
            # store tvr predictions
            os.makedirs(join(opts.output_dir, 'results'))
        if opts.nms_thd != -1:
            # store tvr-nms predictions
            if not exists(join(opts.output_dir, 'results_nms')):
                os.makedirs(join(opts.output_dir, 'results_nms'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
    else:
        pbar = NoOp()
        model_saver = NoOp()
        restorer = NoOp()

    if global_step > 0:
        pbar.update(global_step)
    LOGGER.info(f"***** Running training with {n_gpu} GPUs *****")
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)

    task2loss = {
        task: RunningMeter(f'loss/{task}')
        for task in train_dataloaders.keys()
    }

    for obj in (f'{opts.task}_st_ed', f'{opts.task}_neg_ctx',
                f'{opts.task}_neg_q'):
        task2loss[obj] = RunningMeter(f'loss/{obj}')
    model.train()
    n_examples = defaultdict(int)
    start = time()
    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    if global_step == 0:
        optimizer.step()
    for step, (task, batch) in enumerate(meta_loader):
        if len(opts.hard_negtiave_start_step) > 0:
            for i, hn_step in enumerate(opts.hard_negtiave_start_step):
                if global_step >= hn_step and hn_step != -1:
                    model.set_hard_negative(True, opts.hard_pool_size[i],
                                            opts.hard_neg_weights[i])
        if opts.train_span_start_step != -1 and\
                global_step >= opts.train_span_start_step:
            model.set_train_st_ed(opts.lw_st_ed)

        n_examples[task] += opts.train_batch_size

        loss = model(batch, task=task, compute_loss=True)

        loss_st_ed, loss_neg_ctx, loss_neg_q = loss
        loss = loss_st_ed + loss_neg_ctx + loss_neg_q
        for n, ls, w in (('st_ed', loss_st_ed, opts.lw_st_ed),
                         ('neg_ctx', loss_neg_ctx, opts.lw_neg_ctx),
                         ('neg_q', loss_neg_q, opts.lw_neg_q)):
            ls = ls.item()
            if w:
                ls /= w
            task2loss[f'{task}_{n}'](ls)

        loss = loss.mean()
        task2loss[task](loss.item())

        delay_unscale = (step + 1) % opts.gradient_accumulation_steps != 0
        with amp.scale_loss(loss,
                            optimizer,
                            delay_unscale=delay_unscale,
                            loss_id=task2scaler[task]) as scaled_loss:
            scaled_loss.backward()
            if not delay_unscale:
                # gather gradients from every processes
                # do this before unscaling to make sure every process uses
                # the same gradient scale
                grads = [
                    p.grad.data for p in model.parameters()
                    if p.requires_grad and p.grad is not None
                ]
                all_reduce_and_rescale_tensors(grads, float(1))

        if (step + 1) % opts.gradient_accumulation_steps == 0:
            global_step += 1

            # learning rate scheduling
            lr_this_step = get_lr_sched(global_step, opts)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_this_step
            TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

            # log loss
            TB_LOGGER.log_scaler_dict({
                temp_loss.name: temp_loss.val
                for temp_loss in task2loss.values()
                if temp_loss.val is not None
            })
            TB_LOGGER.step()

            # update model params
            if opts.grad_norm != -1:
                grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                            opts.grad_norm)
                TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
            optimizer.step()
            optimizer.zero_grad()
            pbar.update(1)

            if global_step % 100 == 0:
                # monitor training throughput
                LOGGER.info('-------------------------------------------')
                LOGGER.info(f'Step {global_step}:')
                for t in train_dataloaders.keys():
                    tot_ex = sum(all_gather_list(n_examples[t]))
                    ex_per_sec = int(tot_ex / (time() - start))
                    LOGGER.info(f'{t}: {tot_ex} examples trained at '
                                f'{ex_per_sec} ex/s')
                    TB_LOGGER.add_scalar(f'perf/{t}_ex_per_s', ex_per_sec,
                                         global_step)

            if global_step % opts.valid_steps == 0:
                LOGGER.info('===========================================')
                LOGGER.info(f"Step {global_step}: start running validation")
                validate(model, val_dataloaders, opts)
                if hvd.rank() == 0 or opts.distributed_eval:
                    log, results = validate_full_vcmr(model,
                                                      inf_loader_val,
                                                      'val',
                                                      opts,
                                                      model_opts=opts)
                    save_json(
                        results, f'{opts.output_dir}/results/'
                        f'val_results_{global_step}_rank{hvd.rank()}.json')
                    TB_LOGGER.log_scaler_dict(log)
                    if opts.test_query_txt_db:
                        log, results = validate_full_vcmr(model,
                                                          inf_loader_test,
                                                          'test',
                                                          opts,
                                                          model_opts=opts)
                        save_json(
                            results, f'{opts.output_dir}/results/'
                            f'test_results_{global_step}_rank{hvd.rank()}.json'
                        )
                        TB_LOGGER.log_scaler_dict(log)
                LOGGER.info('===========================================')
                model_saver.save(model, global_step)

            # step restorer in the end to prevent missing validation checkpoint
            restorer.step()
        if global_step >= opts.num_train_steps:
            break

    LOGGER.info('===========================================')
    if global_step % opts.valid_steps != 0:
        if hvd.rank() == 0 or opts.distributed_eval:
            log, results = validate_full_vcmr(model,
                                              inf_loader_val,
                                              'val',
                                              opts,
                                              model_opts=opts)
            save_json(
                results, f'{opts.output_dir}/results/'
                f'val_results_{global_step}'
                f'_rank{hvd.rank()}_final.json')
            TB_LOGGER.log_scaler_dict(log)
            if opts.test_query_txt_db:
                log, results = validate_full_vcmr(model,
                                                  inf_loader_test,
                                                  'test',
                                                  opts,
                                                  model_opts=opts)
                save_json(
                    results, f'{opts.output_dir}/results/'
                    f'test_results_{global_step}_rank{hvd.rank()}.json')
                TB_LOGGER.log_scaler_dict(log)
    model_saver.save(model, f'{global_step}_final')
コード例 #17
0
    def parse(self):
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()

        if opt.debug:
            opt.results_root = os.path.sep.join(
                opt.results_root.split(os.path.sep)[:-1] + [
                    "debug_results",
                ])
            opt.no_core_driver = True
            opt.num_workers = 0
            opt.eval_query_bsz = 100

        if isinstance(self, TestOptions):
            # modify model_dir to absolute path
            opt.model_dir = os.path.join(
                os.path.dirname(os.path.abspath(__file__)), "results",
                opt.model_dir)
            saved_options = load_json(
                os.path.join(opt.model_dir, self.saved_option_filename))
            for arg in saved_options:  # use saved options to overwrite all BaseOptions args.
                if arg not in [
                        "results_root", "num_workers", "nms_thd", "debug",
                        "eval_split_name", "eval_path", "eval_query_bsz",
                        "eval_context_bsz", "max_pred_l", "min_pred_l",
                        "external_inference_vr_res_path"
                ]:
                    setattr(opt, arg, saved_options[arg])
            # opt.no_core_driver = True
        else:
            if opt.exp_id is None:
                raise ValueError(
                    "--exp_id is required for at a training option!")

            if opt.clip_length is None:
                opt.clip_length = ProposalConfigs[opt.dset_name]["clip_length"]
                print("Loaded clip_length {} from proposal config file".format(
                    opt.clip_length))
            opt.results_dir = os.path.join(
                opt.results_root, "-".join([
                    opt.dset_name, opt.ctx_mode, opt.exp_id,
                    time.strftime("%Y_%m_%d_%H_%M_%S")
                ]))
            mkdirp(opt.results_dir)
            # save a copy of current code
            code_dir = os.path.dirname(os.path.realpath(__file__))
            code_zip_filename = os.path.join(opt.results_dir, "code.zip")
            make_zipfile(
                code_dir,
                code_zip_filename,
                enclosing_dir="code",
                exclude_dirs_substring="results",
                exclude_dirs=["results", "debug_results", "__pycache__"],
                exclude_extensions=[".pyc", ".ipynb", ".swap"],
            )

        self.display_save(opt)

        if "sub" in opt.ctx_mode:
            assert opt.dset_name == "tvr", "sub is only supported for tvr dataset"

        if opt.hard_negtiave_start_epoch != -1:
            if opt.hard_pool_size > opt.bsz:
                print("[WARNING] hard_pool_size is larger than bsz")

        assert opt.stop_task in opt.eval_tasks_at_training
        opt.ckpt_filepath = os.path.join(opt.results_dir, self.ckpt_filename)
        opt.train_log_filepath = os.path.join(opt.results_dir,
                                              self.train_log_filename)
        opt.eval_log_filepath = os.path.join(opt.results_dir,
                                             self.eval_log_filename)
        opt.tensorboard_log_dir = os.path.join(opt.results_dir,
                                               self.tensorboard_log_dir)
        opt.device = torch.device(
            "cuda:%d" % opt.device_ids[0] if opt.device >= 0 else "cpu")
        opt.h5driver = None if opt.no_core_driver else "core"
        # num_workers > 1 will only work with "core" mode, i.e., memory-mapped hdf5
        opt.num_workers = 1 if opt.no_core_driver else opt.num_workers
        opt.pin_memory = not opt.no_pin_memory

        if "video" in opt.ctx_mode and opt.vid_feat_size > 3000:  # 3072, the normalized concatenation of resnet+i3d
            assert opt.no_norm_vfeat

        if "tef" in opt.ctx_mode and "video" in opt.ctx_mode:
            opt.vid_feat_size += 2
        if "tef" in opt.ctx_mode and "sub" in opt.ctx_mode:
            opt.sub_feat_size += 2

        if "video" not in opt.ctx_mode or "sub" not in opt.ctx_mode:
            opt.no_merge_two_stream = True
            opt.no_cross_att = True

        self.opt = opt
        return opt
コード例 #18
0
    def __init__(self,
                 dset_name,
                 data_path,
                 desc_bert_path,
                 sub_bert_path,
                 max_desc_len,
                 vid_feat_path,
                 clip_length,
                 vid_feat_size,
                 sub_feat_size=0,
                 ctx_mode="video_tef",
                 pos_iou_thd=0.7,
                 neg_iou_thd=0.3,
                 h5driver=None,
                 data_ratio=1.0,
                 normalize_vfeat=True,
                 normalize_tfeat=True,
                 model_type="cal",
                 external_train_vr_res_path=None,
                 video_duration_idx_path=None):
        self.dset_name = dset_name
        self.model_type = model_type
        self.pool_local = model_type == "mcn"  # pool local feature
        self.data_path = data_path
        self.data_ratio = data_ratio

        self.desc_bert_path = desc_bert_path
        self.max_desc_len = max_desc_len
        self.sub_bert_path = sub_bert_path

        self.vid_feat_path = vid_feat_path
        self.clip_length = clip_length
        self.ctx_mode = ctx_mode

        self.pos_iou_thd = pos_iou_thd
        self.neg_iou_thd = neg_iou_thd

        self.vid_feat_output_size = 2 * vid_feat_size * (
            "video" in ctx_mode) + 2 * ("tef" in ctx_mode)
        self.sub_feat_output_size = 2 * sub_feat_size * (
            "sub" in ctx_mode) + 2 * ("tef" in ctx_mode)

        # prepare desc data
        self.data = load_jsonl(data_path)
        if self.data_ratio != 1:
            n_examples = int(len(self.data) * data_ratio)
            self.data = self.data[:n_examples]
            logger.info("Using {}% of the data: {} examples".format(
                data_ratio * 100, n_examples))

        self.proposal_fn = get_proposal_interface(dset_name)
        if self.ctx_mode != "tef":
            self.vid_feat_h5 = h5py.File(self.vid_feat_path,
                                         "r",
                                         driver=h5driver)
        self.desc_bert_h5 = h5py.File(self.desc_bert_path,
                                      "r",
                                      driver=h5driver)
        if "sub" in self.ctx_mode:
            self.sub_bert_h5 = h5py.File(self.sub_bert_path,
                                         "r",
                                         driver=h5driver)
        self.normalize_vfeat = normalize_vfeat
        self.normalize_tfeat = normalize_tfeat
        self.use_video = "video" in self.ctx_mode
        self.use_sub = "sub" in self.ctx_mode
        self.use_tef = "tef" in self.ctx_mode

        if external_train_vr_res_path is not None:
            video_data = load_json(video_duration_idx_path)["train"]
            # {video_idx: [vid_name, vid_duration]}
            video_idx2name_dur_pair = {
                v[1]: [k, v[0]]
                for k, v in video_data.items()
            }
            external_vr_res = load_json(external_train_vr_res_path)
            # {desc_id: [(vid_name, vid_duration), ...]}
            self.desc_id2video_names_dur_pairs = \
                {e["desc_id"]: [video_idx2name_dur_pair[int(sub_e[0])] for sub_e in e["predictions"]]
                 for e in external_vr_res["VR"]}  # ordered
コード例 #19
0
ファイル: config.py プロジェクト: Worm4047/TVR
    def parse(self):
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()

        if opt.debug:
            opt.results_root = os.path.sep.join(
                opt.results_root.split(os.path.sep)[:-1] + [
                    "debug_results",
                ])
            opt.no_core_driver = True
            opt.num_workers = 0

        if isinstance(self, TestOptions):
            # modify model_dir to absolute path
            opt.model_dir = os.path.join(
                os.path.dirname(os.path.abspath(__file__)), "results",
                opt.model_dir)
            saved_options = load_json(
                os.path.join(opt.model_dir, self.saved_option_filename))
            for arg in saved_options:  # use saved options to overwrite all BaseOptions args.
                if arg not in [
                        "results_root", "num_workers", "nms_thd", "debug",
                        "eval_split_name", "eval_path", "use_intermediate",
                        "external_inference_vr_res_path"
                ]:
                    setattr(opt, arg, saved_options[arg])
            # opt.no_core_driver = True
        else:
            if opt.exp_id is None:
                raise ValueError(
                    "--exp_id is required for at a training option!")

            if opt.clip_length is None:
                opt.clip_length = ProposalConfigs[opt.dset_name]["clip_length"]
            opt.results_dir = os.path.join(
                opt.results_root, "-".join([
                    opt.dset_name, opt.model_type, opt.ctx_mode, opt.exp_id,
                    time.strftime("%Y_%m_%d_%H_%M_%S")
                ]))
            mkdirp(opt.results_dir)
            # save a copy of current code
            code_dir = os.path.dirname(os.path.realpath(__file__))
            code_zip_filename = os.path.join(opt.results_dir, "code.zip")
            make_zipfile(
                code_dir,
                code_zip_filename,
                enclosing_dir="code",
                exclude_dirs_substring="results",
                exclude_dirs=["results", "debug_results", "__pycache__"],
                exclude_extensions=[".pyc", ".ipynb", ".swap"])

        self.save_args(opt)

        if "sub" in opt.ctx_mode:
            assert opt.dset_name == "tvr", "sub is only supported for tvr dataset"

        if "video" in opt.ctx_mode and opt.vid_feat_size > 3000:  # 3072, the normalized concatenation of resnet+i3d
            assert opt.no_norm_vfeat

        opt.ckpt_filepath = os.path.join(opt.results_dir, self.ckpt_filename)
        opt.train_log_filepath = os.path.join(opt.results_dir,
                                              self.train_log_filename)
        opt.eval_log_filepath = os.path.join(opt.results_dir,
                                             self.eval_log_filename)
        opt.tensorboard_log_dir = os.path.join(opt.results_dir,
                                               self.tensorboard_log_dir)
        opt.device = torch.device(
            "cuda:%d" % opt.device_ids[0] if opt.device >= 0 else "cpu")
        opt.h5driver = None if opt.no_core_driver else "core"
        # num_workers > 1 will only work with "core" mode, i.e., memory-mapped hdf5
        opt.pin_memory = not opt.no_pin_memory
        opt.num_workers = 1 if opt.no_core_driver else opt.num_workers

        # Display settings
        print("------------ Options -------------\n{}\n-------------------".
              format({str(k): str(v)
                      for k, v in sorted(vars(opt).items())}))
        self.opt = opt
        return opt
コード例 #20
0
ファイル: eval_vcmr.py プロジェクト: zhixinma/HERO
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, 16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    model_config = f'{opts.output_dir}/log/model_config.json'

    # load DBs and image dirs
    video_ids = get_video_ids(opts.query_txt_db)
    if opts.task != "didemo_video_only":
        video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts)
    else:
        txt_meta = load_json(os.path.join(opts.query_txt_db, "meta.json"))
        video_db = load_video_only_dataset(opts.vfeat_db, txt_meta, model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db
    q_txt_db = QueryTokLmdb(opts.query_txt_db, -1)
    if opts.task != "didemo_video_only":
        inf_dataset = VcmrFullEvalDataset
    else:
        inf_dataset = VcmrVideoOnlyFullEvalDataset

    eval_dataset = inf_dataset(video_ids, video_db, q_txt_db, distributed=model_opts.distributed_eval)

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = ("v_encoder.f_encoder.img_embeddings.position_embeddings.weight")
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])

    model = HeroForVcmr.from_pretrained(
        model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=model_opts.lw_neg_ctx,
        lw_neg_q=model_opts.lw_neg_q, lw_st_ed=0,
        ranking_loss_type=model_opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=model_opts.hard_pool_size,
        margin=model_opts.margin,
        use_all_neg=model_opts.use_all_neg,
        drop_svmr_prob=model_opts.drop_svmr_prob)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=vcmr_full_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results = validate_full_vcmr(model, eval_dataloader, opts.split, opts, model_opts)
    result_dir = f'{opts.output_dir}/results_{opts.split}'

    if not exists(result_dir) and rank == 0:
        os.makedirs(result_dir)

    all_results_list = all_gather_list(results)

    if hvd.rank() == 0:  # save for only one time
        all_results = {"video2idx": all_results_list[0]["video2idx"]}
        for rank_id in range(hvd.size()):
            for key, val in all_results_list[rank_id].items():
                if key == "video2idx":
                    continue
                if key not in all_results:
                    all_results[key] = []
                all_results[key].extend(all_results_list[rank_id][key])
        LOGGER.info('All results joined......')

        # save_vr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vr.json')
        # save_vcmr_base_on_vr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vcmr_base_on_vr.json')
        save_vcmr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vcmr.json')