Example #1
0
encoder = nemo_nlp.huggingface.BERT(
    pretrained_model_name=args.pretrained_model, local_rank=args.local_rank)

device = encoder.bert.embeddings.word_embeddings.weight.get_device()
zeros = torch.zeros((tokens_to_add, args.d_model)).to(device=device)

encoder.bert.embeddings.word_embeddings.weight.data = torch.cat(
    (encoder.bert.embeddings.word_embeddings.weight.data, zeros))

decoder = nemo_nlp.TransformerDecoderNM(
    d_model=args.d_model,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    num_attn_heads=args.num_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=vocab_size,
    max_seq_length=args.max_seq_length,
    embedding_dropout=args.embedding_dropout,
    learn_positional_encodings=True,
    hidden_act="gelu",
    **dec_first_sublayer_params)

decoder.restore_from(args.restore_from, local_rank=args.local_rank)

t_log_softmax = nemo_nlp.TokenClassifier(args.d_model,
                                         num_classes=vocab_size,
                                         num_layers=1,
                                         log_softmax=True)

beam_translator = nemo_nlp.BeamSearchTranslatorNM(
    decoder=decoder,
Example #2
0
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=vocab_size,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_sequence_length,
    share_all_layers=args.share_encoder_layers)
decoder = nemo_nlp.TransformerDecoderNM(
    factory=neural_factory,
    d_embedding=args.d_embedding,
    d_model=args.d_model,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=vocab_size,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_sequence_length,
    share_all_layers=args.share_decoder_layers)
log_softmax = nemo_nlp.TransformerLogSoftmaxNM(factory=neural_factory,
                                               vocab_size=vocab_size,
                                               d_model=args.d_model,
                                               d_embedding=args.d_embedding)
beam_search = nemo_nlp.BeamSearchTranslatorNM(
    factory=neural_factory,
    decoder=decoder,
    log_softmax=log_softmax,
    max_seq_length=args.max_sequence_length,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=src_vocab_size,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_seq_length)

decoder = nemo_nlp.TransformerDecoderNM(
    d_model=args.d_model,
    d_inner=args.d_inner,
    num_layers=args.num_layers,
    embedding_dropout=args.embedding_dropout,
    num_attn_heads=args.num_attn_heads,
    ffn_dropout=args.ffn_dropout,
    vocab_size=tgt_vocab_size,
    attn_score_dropout=args.attn_score_dropout,
    attn_layer_dropout=args.attn_layer_dropout,
    max_seq_length=args.max_seq_length)

log_softmax = nemo_nlp.TokenClassifier(args.d_model,
                                       num_classes=tgt_tokenizer.vocab_size,
                                       num_layers=1,
                                       log_softmax=True)

beam_search = nemo_nlp.BeamSearchTranslatorNM(
    decoder=decoder,
    log_softmax=log_softmax,
    max_seq_length=args.max_seq_length,
Example #4
0
        local_rank=args.local_rank)

    device = encoder.bert.embeddings.word_embeddings.weight.get_device()
    zeros = torch.zeros((tokens_to_add, args.d_model)).to(device=device)

    encoder.bert.embeddings.word_embeddings.weight.data = torch.cat(
        (encoder.bert.embeddings.word_embeddings.weight.data, zeros))

    decoder = nemo_nlp.TransformerDecoderNM(
        factory=neural_factory,
        d_embedding=args.d_embedding,
        d_model=args.d_model,
        d_inner=args.d_inner,
        num_layers=args.num_layers,
        num_attn_heads=args.num_heads,
        ffn_dropout=args.ffn_dropout,
        vocab_size=vocab_size,
        attn_score_dropout=args.attn_score_dropout,
        attn_layer_dropout=args.attn_layer_dropout,
        max_seq_length=max_sequence_length,
        embedding_dropout=args.embedding_dropout,
        share_all_layers=args.share_decoder_layers,
        learn_positional_encodings=True,
        hidden_act="gelu")

    if args.restore_decoder:
        decoder.restore_from(args.decoder_restore_from,
                             local_rank=args.local_rank)

elif args.encoder == "nemo":
    encoder = nemo_nlp.TransformerEncoderNM(
        factory=neural_factory,