Beispiel #1
0
def main(args):
    track = choice_track(args.track)

    assert args.subset in track.subsets
    assert bool(args.ckpt_dir) ^ bool(args.ckpt_fpath)
    if args.find_best:
        assert bool(args.ckpt_dir)

    databin_path = track.get_databin_path('pretrain')
    gold_m2, ori_path, ori_bpe_path, gen_subset, scorer_type = track.get_subset_datapath(
        args.subset)

    # ckpt_dir
    if args.ckpt_dir is not None:
        ckpt_files = util.get_sorted_ckpts(args.ckpt_dir)
        output_dir = track.get_output_dir(args.ckpt_dir)

    # ckpt_fpath
    else:
        ckpt_files = [args.ckpt_fpath]
        output_dir = track.get_output_dir(args.ckpt_fpath)

    if not args.find_best:
        for ckpt in tqdm(ckpt_files):
            run_ckpt(databin_path, ckpt, output_dir, scorer_type, gold_m2,
                     ori_path, ori_bpe_path, gen_subset, args.remove_unk_edits,
                     args.remove_error_type_lst, args.apply_rerank,
                     args.preserve_spell, args.max_edits)

    logging.info(f"[Evaluate] highest score on {ori_path}")
    find_best(output_dir, ori_path, scorer_type)
Beispiel #2
0
def main(args):
    track = choice_track(args.track)

    prepare_text(track)

    for train_mode in track.train_modes:
        databin_path = track.get_databin_path(train_mode)
        trainpref, validpref = track.get_pref(train_mode)
        prepare_binary(databin_path, trainpref, validpref, track.fp.BPE_VOCAB)
Beispiel #3
0
def main(args):
    track = choice_track(args.track)

    assert args.train_mode in track.train_modes
    if args.train_mode == 'pretrain':
        assert args.prev_model_output_dir is None

    databin_path = track.get_databin_path(args.train_mode)
    model_config = track.get_model_config(args.model, args.lr, args.dropout, args.max_epoch, args.seed, args.reset)
    ckpt_dir = track.get_ckpt_dir(args.train_mode, args.model, args.lr, args.dropout, args.seed,
                                  args.prev_model_output_dir)

    _, ori_path, _, _, scorer_type = track.get_subset_datapath('valid')
    fscore, restore_ckpt = find_restore(args.prev_model_output_dir, ori_path, scorer_type)

    train(databin_path, model_config, ckpt_dir, restore_ckpt, args.ngpu)
def main(args):
    # (NOTE)
    # 1: Restricted
    # 3: Low resource
    # 0: CONLL
    track = choice_track(args.track)

    print_log("------ ------ ------")
    print_log(
        f"[Prepare] 1. prepare for the text data of track {track.TRACK_NUM}"
    )  # (MODIFIED)
    prepare_text(track)

    print_log("------ ------ ------")
    print_log(f"[Prepare] 2. create binary data")
    for train_mode in track.train_modes:
        databin_path = track.get_databin_path(train_mode)
        trainpref, validpref = track.get_pref(train_mode)
        prepare_binary(databin_path, trainpref, validpref, track.fp.BPE_VOCAB)
def main(args):
    track = choice_track(args.track)

    assert args.train_mode in track.train_modes
    if args.train_mode == 'pretrain':
        assert args.prev_model_output_dir is None

    # (NOTE) Data used for training.
    databin_path = track.get_databin_path(args.train_mode)
    # (NOTE) Config.
    model_config = track.get_model_config(args.model, args.lr, args.dropout, args.max_epoch, args.seed, args.reset)
    # (NOTE) NAME of the checkpoint dir.
    ckpt_dir = track.get_ckpt_dir(args.train_mode, args.model, args.lr, args.dropout, args.seed,
                                  args.prev_model_output_dir)
    # (NOTE) Path of the checkpoint to be restored.
    _, ori_path, _, _, scorer_type = track.get_subset_datapath('valid')
    fscore, restore_ckpt = find_restore(args.prev_model_output_dir, ori_path, scorer_type)

    train(databin_path, model_config, ckpt_dir, restore_ckpt, args.ngpu)
Beispiel #6
0
def main(args):
    track = choice_track(args.track)

    assert args.subset in track.subsets
    assert bool(args.ckpt_dir) ^ bool(args.ckpt_fpath)
    if args.find_best:
        assert bool(args.ckpt_dir)

    databin_path = track.get_databin_path('pretrain')
    # (NOTE) gold_m2: trg.
    # (NOTE) ori_path: src.
    gold_m2, ori_path, ori_bpe_path, gen_subset, scorer_type = track.get_subset_datapath(
        args.subset)

    # ckpt_dir
    if args.ckpt_dir is not None:
        # (NOTE) checkpoint_best.pt and checkpoint_last.pt are not included.
        ckpt_files = util.get_sorted_ckpts(args.ckpt_dir)
        # (NOTE) Path of the eval rsts to be stored.
        output_dir = track.get_output_dir(args.ckpt_dir)
    # ckpt_fpath
    else:
        # (NOTE) checkpoint_best.pt and checkpoint_last.pt are not included.
        ckpt_files = [args.ckpt_fpath]
        # (NOTE) Path of the eval rsts to be stored.
        output_dir = track.get_output_dir(args.ckpt_fpath)

    if not args.find_best:
        # (NOTE) Evals all checkpoints for `ckpt_dir` (checkpoint1.pt, checkpoint2.pt, ... ),
        # (NOTE) or the checkpoint file for `ckpt_fpath`.
        for ckpt in tqdm(ckpt_files):
            run_ckpt(databin_path, ckpt, output_dir, scorer_type, gold_m2,
                     ori_path, ori_bpe_path, gen_subset, args.remove_unk_edits,
                     args.remove_error_type_lst, args.apply_rerank,
                     args.preserve_spell, args.max_edits)

    logging.info(f"[Evaluate] highest score on {ori_path}")
    # (NOTE) Finds the best fscore and checkpoint from all the *.report.
    find_best(output_dir, ori_path, scorer_type)