def main(args): track = choice_track(args.track) assert args.subset in track.subsets assert bool(args.ckpt_dir) ^ bool(args.ckpt_fpath) if args.find_best: assert bool(args.ckpt_dir) databin_path = track.get_databin_path('pretrain') gold_m2, ori_path, ori_bpe_path, gen_subset, scorer_type = track.get_subset_datapath( args.subset) # ckpt_dir if args.ckpt_dir is not None: ckpt_files = util.get_sorted_ckpts(args.ckpt_dir) output_dir = track.get_output_dir(args.ckpt_dir) # ckpt_fpath else: ckpt_files = [args.ckpt_fpath] output_dir = track.get_output_dir(args.ckpt_fpath) if not args.find_best: for ckpt in tqdm(ckpt_files): run_ckpt(databin_path, ckpt, output_dir, scorer_type, gold_m2, ori_path, ori_bpe_path, gen_subset, args.remove_unk_edits, args.remove_error_type_lst, args.apply_rerank, args.preserve_spell, args.max_edits) logging.info(f"[Evaluate] highest score on {ori_path}") find_best(output_dir, ori_path, scorer_type)
def main(args): track = choice_track(args.track) prepare_text(track) for train_mode in track.train_modes: databin_path = track.get_databin_path(train_mode) trainpref, validpref = track.get_pref(train_mode) prepare_binary(databin_path, trainpref, validpref, track.fp.BPE_VOCAB)
def main(args): track = choice_track(args.track) assert args.train_mode in track.train_modes if args.train_mode == 'pretrain': assert args.prev_model_output_dir is None databin_path = track.get_databin_path(args.train_mode) model_config = track.get_model_config(args.model, args.lr, args.dropout, args.max_epoch, args.seed, args.reset) ckpt_dir = track.get_ckpt_dir(args.train_mode, args.model, args.lr, args.dropout, args.seed, args.prev_model_output_dir) _, ori_path, _, _, scorer_type = track.get_subset_datapath('valid') fscore, restore_ckpt = find_restore(args.prev_model_output_dir, ori_path, scorer_type) train(databin_path, model_config, ckpt_dir, restore_ckpt, args.ngpu)
def main(args): # (NOTE) # 1: Restricted # 3: Low resource # 0: CONLL track = choice_track(args.track) print_log("------ ------ ------") print_log( f"[Prepare] 1. prepare for the text data of track {track.TRACK_NUM}" ) # (MODIFIED) prepare_text(track) print_log("------ ------ ------") print_log(f"[Prepare] 2. create binary data") for train_mode in track.train_modes: databin_path = track.get_databin_path(train_mode) trainpref, validpref = track.get_pref(train_mode) prepare_binary(databin_path, trainpref, validpref, track.fp.BPE_VOCAB)
def main(args): track = choice_track(args.track) assert args.train_mode in track.train_modes if args.train_mode == 'pretrain': assert args.prev_model_output_dir is None # (NOTE) Data used for training. databin_path = track.get_databin_path(args.train_mode) # (NOTE) Config. model_config = track.get_model_config(args.model, args.lr, args.dropout, args.max_epoch, args.seed, args.reset) # (NOTE) NAME of the checkpoint dir. ckpt_dir = track.get_ckpt_dir(args.train_mode, args.model, args.lr, args.dropout, args.seed, args.prev_model_output_dir) # (NOTE) Path of the checkpoint to be restored. _, ori_path, _, _, scorer_type = track.get_subset_datapath('valid') fscore, restore_ckpt = find_restore(args.prev_model_output_dir, ori_path, scorer_type) train(databin_path, model_config, ckpt_dir, restore_ckpt, args.ngpu)
def main(args): track = choice_track(args.track) assert args.subset in track.subsets assert bool(args.ckpt_dir) ^ bool(args.ckpt_fpath) if args.find_best: assert bool(args.ckpt_dir) databin_path = track.get_databin_path('pretrain') # (NOTE) gold_m2: trg. # (NOTE) ori_path: src. gold_m2, ori_path, ori_bpe_path, gen_subset, scorer_type = track.get_subset_datapath( args.subset) # ckpt_dir if args.ckpt_dir is not None: # (NOTE) checkpoint_best.pt and checkpoint_last.pt are not included. ckpt_files = util.get_sorted_ckpts(args.ckpt_dir) # (NOTE) Path of the eval rsts to be stored. output_dir = track.get_output_dir(args.ckpt_dir) # ckpt_fpath else: # (NOTE) checkpoint_best.pt and checkpoint_last.pt are not included. ckpt_files = [args.ckpt_fpath] # (NOTE) Path of the eval rsts to be stored. output_dir = track.get_output_dir(args.ckpt_fpath) if not args.find_best: # (NOTE) Evals all checkpoints for `ckpt_dir` (checkpoint1.pt, checkpoint2.pt, ... ), # (NOTE) or the checkpoint file for `ckpt_fpath`. for ckpt in tqdm(ckpt_files): run_ckpt(databin_path, ckpt, output_dir, scorer_type, gold_m2, ori_path, ori_bpe_path, gen_subset, args.remove_unk_edits, args.remove_error_type_lst, args.apply_rerank, args.preserve_spell, args.max_edits) logging.info(f"[Evaluate] highest score on {ori_path}") # (NOTE) Finds the best fscore and checkpoint from all the *.report. find_best(output_dir, ori_path, scorer_type)