num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=tgt_vocab_size,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_seq_length)

log_softmax = nemo_nlp.TokenClassifier(args.d_model,
                                       num_classes=tgt_tokenizer.vocab_size,
                                       num_layers=1,
                                       log_softmax=True)

beam_search = nemo_nlp.BeamSearchTranslatorNM(
    decoder=decoder,
    log_softmax=log_softmax,
    max_seq_length=args.max_seq_length,
    beam_size=args.beam_size,
    bos_token=tgt_tokenizer.bos_id(),
    pad_token=tgt_tokenizer.pad_id(),
    eos_token=tgt_tokenizer.eos_id())

loss_fn = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(
    pad_id=tgt_tokenizer.pad_id(), label_smoothing=args.label_smoothing)

if tie_weight:
    log_softmax.mlp.last_linear_layer.weight = \
        encoder.embedding_layer.token_embedding.weight
    decoder.embedding_layer.token_embedding.weight = \
        encoder.embedding_layer.token_embedding.weight


def create_pipeline(dataset_src,
Ejemplo n.º 2
0
    learn_positional_encodings=True,
    hidden_act="gelu",
    **dec_first_sublayer_params)

decoder.restore_from(args.restore_from, local_rank=args.local_rank)

t_log_softmax = nemo_nlp.TokenClassifier(args.d_model,
                                         num_classes=vocab_size,
                                         num_layers=1,
                                         log_softmax=True)

beam_translator = nemo_nlp.BeamSearchTranslatorNM(
    decoder=decoder,
    log_softmax=t_log_softmax,
    max_seq_length=args.max_seq_length,
    beam_size=args.beam_size,
    length_penalty=args.len_pen,
    bos_token=tokenizer.bos_id(),
    pad_token=tokenizer.pad_id(),
    eos_token=tokenizer.eos_id())

loss = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(pad_id=0, smoothing=0.1)

loss_eval = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(pad_id=0, smoothing=0.0)

# tie all embeddings weights
t_log_softmax.mlp.last_linear_layer.weight = \
    encoder.bert.embeddings.word_embeddings.weight
decoder.embedding_layer.token_embedding.weight = \
    encoder.bert.embeddings.word_embeddings.weight
decoder.embedding_layer.position_embedding.weight = \
Ejemplo n.º 3
0
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=max_sequence_length,
    embedding_dropout=args.embedding_dropout,
    share_all_layers=args.share_encoder_layers,
    hidden_act="gelu")

log_softmax = nemo_nlp.TransformerLogSoftmaxNM(factory=neural_factory,
                                               vocab_size=vocab_size,
                                               d_model=args.d_model,
                                               d_embedding=args.d_embedding)

beam_translator = nemo_nlp.BeamSearchTranslatorNM(
    factory=neural_factory,
    decoder=decoder,
    log_softmax=log_softmax,
    max_seq_length=max_sequence_length,
    beam_size=args.beam_size,
    length_penalty=args.len_pen,
    bos_token=tokenizer.bos_id(),
    pad_token=tokenizer.pad_id(),
    eos_token=tokenizer.eos_id())

loss = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(factory=neural_factory,
                                                 pad_id=tokenizer.pad_id(),
                                                 smoothing=0.1)

loss_eval = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(
    factory=neural_factory, pad_id=tokenizer.pad_id(), smoothing=0.0)

# tie weight of embedding and log_softmax layers
if args.tie_enc_dec:
    decoder.embedding_layer.token_embedding.weight = \