Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(
        "Train Viggo transformer model (ls=inc freq, da=base+phrases)")
    parser.add_argument("output_dir", type=Path, help="save directory")
    parser.add_argument("--seed", type=int, default=234222452)
    parser.add_argument("--layers", default=2, type=int, help="num layers")
    parser.add_argument("--ls", default=0.1, type=float, 
                        help="label smoothing")
    parser.add_argument("--wd", default=0.0, type=float, 
                        help="weight decay")
    parser.add_argument("--opt", choices=["adam", "sgd"], default="sgd",
                        help='optimizer')
    parser.add_argument("--lr", default=0.5, type=float, 
                        help="learning rate")
    parser.add_argument("--tie-embeddings", action="store_true",
                        help="share decoder input/output embeddings")
    parser.add_argument("--attn", choices=['bahdanau', 'luong-general'],
                        default='luong-general', help='attention type')
    parser.add_argument("--max-epochs", default=100, type=int,
                        help="max training epochs")
    parser.add_argument("--gpu", default=-1, type=int)
    parser.add_argument("--n-procs", default=2, type=int, 
                        help="num data loader processes")
    parser.add_argument("--tr-batch-size", default=128, type=int,
                        help="training batch size")
    parser.add_argument("--va-batch-size", default=1, type=int,
                        help="valid batch size")
    args = parser.parse_args()

    seeds(args.seed)
    assert os.getenv("MRT_EVAL_SCRIPT") is not None
    eval_script = os.getenv("MRT_EVAL_SCRIPT")

    dataset = "E2E"
    lin_strat = "inc_freq"
    is_delex = False

    mr_vcb, utt_vcb = setup_vocab(dataset, lin_strat, is_delex)
    tr_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb,
                                include_phrases=True)
    va_ds = setup_validation_data(dataset, lin_strat, is_delex,
                                  mr_vcb, utt_vcb)

    tr_batches = make_batches(tr_ds, args.tr_batch_size, args.n_procs, 
                              lin_strat, is_delex)
    va_batches = make_batches(va_ds, args.va_batch_size, args.n_procs, 
                              lin_strat, is_delex)
    
    model = setup_model("gru", "bi", args.layers, args.attn,
                        args.tie_embeddings, mr_vcb, utt_vcb, beam_size=8)


    trainer = setup_trainer(model, args.opt, args.lr, args.wd, args.ls, 
                            tr_batches, va_batches, args.max_epochs, 
                            utt_vcb, eval_script, mr_utils, 
                            lambda: f"E2E/bi-gru/if/base+phrases/{args.seed}")

    env = {'proj_dir': args.output_dir, "gpu": args.gpu}
    trainer.run(env, verbose=True)
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(
        "Train Viggo transformer model (ls=inc freq, da=base+phrases)")
    parser.add_argument("output_dir", type=Path, help="save directory")
    parser.add_argument("--seed", type=int, default=234222452)
    parser.add_argument("--layers", default=2, type=int, help="num layers")
    parser.add_argument("--ls", default=0.1, type=float, 
                        help="label smoothing")
    parser.add_argument("--tie-embeddings", action="store_true",
                        help="share decoder input/output embeddings")
    parser.add_argument("--max-epochs", default=100, type=int,
                        help="max training epochs")
    parser.add_argument("--gpu", default=-1, type=int)
    parser.add_argument("--n-procs", default=2, type=int, 
                        help="num data loader processes")
    parser.add_argument("--tr-batch-size", default=128, type=int,
                        help="training batch size")
    parser.add_argument("--va-batch-size", default=1, type=int,
                        help="valid batch size")
    args = parser.parse_args()

    seeds(args.seed)
    assert os.getenv("MRT_EVAL_SCRIPT") is not None
    eval_script = os.getenv("MRT_EVAL_SCRIPT")

    dataset = "Viggo"
    lin_strat = "inc_freq"
    is_delex = True

    mr_vcb, utt_vcb = setup_vocab(dataset, lin_strat, is_delex)
    tr_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb,
                                include_phrases=True)
    va_ds = setup_validation_data(dataset, lin_strat, is_delex,
                                  mr_vcb, utt_vcb)

    tr_batches = make_batches(tr_ds, args.tr_batch_size, args.n_procs, 
                              lin_strat, is_delex)
    va_batches = make_batches(va_ds, args.va_batch_size, args.n_procs, 
                              lin_strat, is_delex)
    
    model = setup_model("transformer", None, args.layers, None,
                        args.tie_embeddings, mr_vcb, utt_vcb, beam_size=8)


    trainer = setup_trainer(model, "adamtri", None, None, args.ls, tr_batches, 
                            va_batches, args.max_epochs, utt_vcb, eval_script, 
                            mr_utils, 
                            lambda: f"Viggo/transformer/if/base+phrases/{args.seed}")

    env = {'proj_dir': args.output_dir, "gpu": args.gpu}
    trainer.run(env, verbose=True)
Esempio n. 3
0
va_ds = setup_validation_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb)

tr_batches = make_batches(tr_ds, 128, WKRS, lin_strat, is_delex)
tr_phrase_batches = make_batches(tr_phrase_ds, 128, WKRS, lin_strat, is_delex)
tr_template_batches = make_batches(tr_templ_ds, 128, WKRS, lin_strat, is_delex)
tr_phrase_template_batches = make_batches(tr_phrase_templ_ds, 128, WKRS,
                                          lin_strat, is_delex)

va_batches = make_batches(va_ds, 1 if model_type == 'transformer' else 64,
                          WKRS, lin_strat, is_delex)

model = setup_model(model_type,
                    rnn_dir,
                    L,
                    rnn_attn,
                    tie_dec_emb,
                    mr_vcb,
                    utt_vcb,
                    beam_size=8)

if dataset == 'Viggo':
    mr_utils = mrt.viggo.mr_utils
elif dataset == 'E2E':
    mr_utils = mrt.e2e.mr_utils
else:
    raise Exception(f"Bad dataset: {dataset}")


def make_template():
    if model_type in ['gru', 'lstm']:
        TEMPLATE = "rnn/{lin_strat}_{delex}/{arch}_{dir}_L={layers}_attn={attn}_tied={tied}_{opt}_lr={lr}_wd={wd}_ls={ls}"
Esempio n. 4
0
WKRS = HP('WKRS', 2, description='data loader processes')
T = HP('T', 500, description='max training epochs')
LR = HP('LR', 0.1, description='learning rate')
WD = HP('WD', 0.0, description='weight decay')
LS = HP('LS', 0.1, description='label smoothing')
L = HP('L', 2, description='num layers')

mr_vcb, utt_vcb = setup_vocab(dataset, lin_strat, is_delex)
tr_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb)
va_ds = setup_validation_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb)

tr_batches = make_batches(tr_ds, 128, WKRS, lin_strat, is_delex)
va_batches = make_batches(va_ds, 128, WKRS, lin_strat, is_delex)

model = setup_model(model_type, rnn_dir, L, rnn_attn, tie_dec_emb,
                    mr_vcb, utt_vcb, no_posemb=no_posemb,
                    no_encoder=no_encoder)

if dataset == 'Viggo':
    mr_utils = mrt.viggo.mr_utils
elif dataset == 'E2E':
    mr_utils = mrt.e2e.mr_utils
else:
    raise Exception(f"Bad dataset: {dataset}")

def make_template():
    if model_type in ['gru', 'lstm']:
        TEMPLATE = "{no_enc}rnn/{lin_strat}_{delex}/{arch}_{dir}_L={layers}_attn={attn}_tied={tied}_{opt}_lr={lr}_wd={wd}_ls={ls}"
        return TEMPLATE.format(
            lin_strat=lin_strat, delex='delex' if is_delex else 'lex', 
            dir=rnn_dir,