예제 #1
0
    tokens_in_batch=args.batch_size,
    clean=True)
eval_data_layer = nemo_nlp.TranslationDataLayer(
    factory=neural_factory,
    tokenizer_src=tokenizer,
    tokenizer_tgt=tokenizer,
    dataset_src=f"{args.data_root}/{args.eval_datasets[0]}.{args.src_lang}",
    dataset_tgt=f"{args.data_root}/{args.eval_datasets[0]}.{args.tgt_lang}",
    tokens_in_batch=args.eval_batch_size)
encoder = nemo_nlp.TransformerEncoderNM(
    factory=neural_factory,
    d_embedding=args.d_embedding,
    d_model=args.d_model,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=vocab_size,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_sequence_length,
    share_all_layers=args.share_encoder_layers)
decoder = nemo_nlp.TransformerDecoderNM(
    factory=neural_factory,
    d_embedding=args.d_embedding,
    d_model=args.d_model,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    # source and target use different tokenizers, set tie_weight to False
    tie_weight = False
else:
    nf.logger.info(
        f"Unsupported language pair:{args.src_lang}-{args.tgt_lang}.")
    exit(1)

# instantiate necessary modules for the whole translation pipeline, namely
# data layers, encoder, decoder, output log_softmax, beam_search_translator
# and loss function
encoder = nemo_nlp.TransformerEncoderNM(
    d_model=args.d_model,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=src_vocab_size,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_seq_length)

decoder = nemo_nlp.TransformerDecoderNM(
    d_model=args.d_model,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=tgt_vocab_size,
    attn_score_dropout=args.attn_score_dropout,
예제 #3
0
        learn_positional_encodings=True,
        hidden_act="gelu")

    if args.restore_decoder:
        decoder.restore_from(args.decoder_restore_from,
                             local_rank=args.local_rank)

elif args.encoder == "nemo":
    encoder = nemo_nlp.TransformerEncoderNM(
        factory=neural_factory,
        d_embedding=args.d_embedding,
        d_model=args.d_model,
        d_inner=args.d_inner,
        num_layers=args.num_layers,
        num_attn_heads=args.num_heads,
        ffn_dropout=args.ffn_dropout,
        vocab_size=vocab_size,
        attn_score_dropout=args.attn_score_dropout,
        attn_layer_dropout=args.attn_layer_dropout,
        max_seq_length=max_sequence_length,
        embedding_dropout=args.embedding_dropout,
        share_all_layers=args.share_decoder_layers,
        learn_positional_encodings=True,
        hidden_act="gelu")

    encoder.restore_from(args.encoder_restore_from, local_rank=args.local_rank)

    decoder = nemo_nlp.TransformerDecoderNM(
        factory=neural_factory,
        d_embedding=args.d_embedding,
        d_model=args.d_model,
        d_inner=args.d_inner,