Esempio n. 1
0
    def __init__(self, params):
        super(BiaffineParser, self).__init__()

        self.params = params
        # self.word_dropout = nn.Dropout(p=params['word_dropout'])
        # self.word_dropout_p = params['word_dropout']

        # BERT
        # self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.bert = BertModel.from_pretrained('bert-base-cased')

        self.bert_dropout = SharedDropout(p=params['bert_dropout'])

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=params['n_bert_hidden'],
                             n_hidden=params['n_mlp_arc'],
                             dropout=params['mlp_dropout'])
        self.mlp_arc_d = MLP(n_in=params['n_bert_hidden'],
                             n_hidden=params['n_mlp_arc'],
                             dropout=params['mlp_dropout'])
        self.mlp_rel_h = MLP(n_in=params['n_bert_hidden'],
                             n_hidden=params['n_mlp_rel'],
                             dropout=params['mlp_dropout'])
        self.mlp_rel_d = MLP(n_in=params['n_bert_hidden'],
                             n_hidden=params['n_mlp_rel'],
                             dropout=params['mlp_dropout'])

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=params['n_mlp_arc'],
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=params['n_mlp_rel'],
                                 n_out=params['n_rels'],
                                 bias_x=True,
                                 bias_y=True)
Esempio n. 2
0
    def __init__(self, args):
        super(Model, self).__init__()

        self.args = args
        # the embedding layer
        self.word_embed = nn.Embedding(num_embeddings=args.n_words,
                                       embedding_dim=args.n_embed)
        if args.feat == 'char':
            self.feat_embed = CHAR_LSTM(n_chars=args.n_feats,
                                        n_embed=args.n_char_embed,
                                        n_out=args.n_embed)
        elif args.feat == 'bert':
            self.feat_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.n_embed)
        else:
            self.feat_embed = nn.Embedding(num_embeddings=args.n_feats,
                                           embedding_dim=args.n_embed)
        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # the word-lstm layer
        self.lstm = BiLSTM(input_size=args.n_embed * 2,
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_rel,
                             dropout=args.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_rel,
                             dropout=args.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=args.n_mlp_rel,
                                 n_out=args.n_rels,
                                 bias_x=True,
                                 bias_y=True)
        self.pad_index = args.pad_index
        self.unk_index = args.unk_index
Esempio n. 3
0
    def __init__(self, args):
        super(Model, self).__init__()

        self.args = args
        # the embedding layer
        if args.bert is False:
            self.word_embed = nn.Embedding(num_embeddings=args.n_words,
                                           embedding_dim=args.word_embed)
            if args.freeze_word_emb:
                self.word_embed.weight.requires_grad = False
        else:
            self.word_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.word_embed)

        self.feat_embed = nn.Embedding(num_embeddings=args.n_feats,
                                       embedding_dim=args.n_embed)

        if args.freeze_feat_emb:
            self.feat_embed.weight.requires_grad = False

        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # the word-lstm layer
        self.lstm = BiLSTM(input_size=args.word_embed + args.n_embed,
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)

        self.pad_index = args.pad_index
        self.unk_index = args.unk_index

        self.multinomial = nn.Parameter(torch.ones(args.n_feats, args.n_feats))
Esempio n. 4
0
    def __init__(self,
                 base,
                 config,
                 label_size,
                 bertmodel,
                 baseline_parser=None):
        super(GraphBiaffineParser, self).__init__()

        self.config = config

        if base:
            self.bert = initialize_bertgraph(config.main_path + "/model" +
                                             "/model_" + config.modelname)
        else:
            self.bert = initialize_bertgraph(
                config.main_path + "/model" + "/model_" + config.modelname,
                config.layernorm_key, config.layernorm_value,
                config.input_labeled_graph, config.input_unlabeled_graph,
                label_size)

        if config.mix_layers:
            self.scalar_mix = ScalarMixWithDropout(
                13, do_layer_norm=False, dropout=config.layer_dropout)
        self.mlp_arc_h = MLP(n_in=self.bert.config.hidden_size,
                             n_hidden=config.n_mlp_arc,
                             dropout=config.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=self.bert.config.hidden_size,
                             n_hidden=config.n_mlp_arc,
                             dropout=config.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=self.bert.config.hidden_size,
                             n_hidden=config.n_mlp_rel,
                             dropout=config.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=self.bert.config.hidden_size,
                             n_hidden=config.n_mlp_rel,
                             dropout=config.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=config.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=config.n_mlp_rel,
                                 n_out=config.n_rels,
                                 bias_x=True,
                                 bias_y=True)

        self.pad_index = config.pad_index
        self.unk_index = config.unk_index
Esempio n. 5
0
    def __init__(self, vocab, n_embed, n_char_embed, n_char_out, n_lstm_hidden,
                 n_lstm_layers, n_mlp_arc, n_mlp_lab, n_labels, drop):
        super(BiAffineParser, self).__init__()

        self.vocab = vocab
        # the embedding layer
        self.embed = nn.Embedding(vocab.n_train_words, n_embed)
        self.pretrained = nn.Embedding.from_pretrained(vocab.embeddings)
        # the char-lstm layer
        self.char_lstm = CharLSTM(n_char=vocab.n_chars,
                                  n_embed=n_char_embed,
                                  n_out=n_char_out)
        self.embed_drop = IndependentDropout(p=drop)

        # the word-lstm layer
        self.lstm = ParserLSTM(input_size=n_embed + n_char_out,
                               hidden_size=n_lstm_hidden,
                               num_layers=n_lstm_layers,
                               batch_first=True,
                               dropout=drop,
                               bidirectional=True)
        self.lstm_drop = SharedDropout(p=drop)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=n_lstm_hidden * 2,
                             n_hidden=n_mlp_arc,
                             drop=drop)
        self.mlp_arc_d = MLP(n_in=n_lstm_hidden * 2,
                             n_hidden=n_mlp_arc,
                             drop=drop)
        self.mlp_lab_h = MLP(n_in=n_lstm_hidden * 2,
                             n_hidden=n_mlp_lab,
                             drop=drop)
        self.mlp_lab_d = MLP(n_in=n_lstm_hidden * 2,
                             n_hidden=n_mlp_lab,
                             drop=drop)

        # the BiAffine layers
        self.arc_attn = BiAffine(n_in=n_mlp_arc, bias_x=True, bias_y=False)
        self.lab_attn = BiAffine(n_in=n_mlp_lab,
                                 n_out=n_labels,
                                 bias_x=True,
                                 bias_y=True)

        self.reset_parameters()
Esempio n. 6
0
    def __init__(self, config, embeddings):
        super(BiaffineParser, self).__init__()

        self.config = config
        # the embedding layer
        self.pretrained = nn.Embedding.from_pretrained(embeddings)
        self.embed = nn.Embedding(num_embeddings=config.n_words,
                                  embedding_dim=config.n_embed)
        self.tag_embed = nn.Embedding(num_embeddings=config.n_tags,
                                      embedding_dim=config.n_tag_embed)
        self.embed_dropout = IndependentDropout(p=config.embed_dropout)

        # the word-lstm layer
        self.lstm = BiLSTM(input_size=config.n_embed + config.n_tag_embed,
                           hidden_size=config.n_lstm_hidden,
                           num_layers=config.n_lstm_layers,
                           dropout=config.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=config.lstm_dropout)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=config.n_lstm_hidden * 2,
                             n_hidden=config.n_mlp_arc,
                             dropout=config.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=config.n_lstm_hidden * 2,
                             n_hidden=config.n_mlp_arc,
                             dropout=config.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=config.n_lstm_hidden * 2,
                             n_hidden=config.n_mlp_rel,
                             dropout=config.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=config.n_lstm_hidden * 2,
                             n_hidden=config.n_mlp_rel,
                             dropout=config.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=config.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=config.n_mlp_rel,
                                 n_out=config.n_rels,
                                 bias_x=True,
                                 bias_y=True)
        self.pad_index = config.pad_index
        self.unk_index = config.unk_index

        self.reset_parameters()
Esempio n. 7
0
    def __init__(self, args, mask_token_id=0):
        super().__init__()

        self.args = args
        if args.n_embed:
            # the embedding layer
            self.word_embed = nn.Embedding(num_embeddings=args.n_words,
                                           embedding_dim=args.n_embed)
            self.unk_index = args.unk_index
        else:
            self.word_embed = None
        if args.feat == 'char':
            self.feat_embed = CharLSTM(n_chars=args.n_feats,
                                       n_embed=args.n_char_embed,
                                       n_out=args.n_feat_embed,
                                       pad_index=args.feat_pad_index)
            self.pad_index = args.pad_index
        elif args.feat == 'bert':
            self.feat_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.n_feat_embed,
                                            requires_grad=args.bert_fine_tune,
                                            mask_token_id=mask_token_id,
                                            token_dropout=args.token_dropout,
                                            mix_dropout=args.mix_dropout,
                                            use_hidden_states=args.use_hidden_states,
                                            use_attentions=args.use_attentions,
                                            attention_layer=args.attention_layer)
            #self.args.n_mlp_arc = self.feat_embed.bert.config.max_position_embeddings
            self.args.n_feat_embed = self.feat_embed.n_out # taken from the model
            self.args.n_bert_layers = self.feat_embed.n_layers # taken from the model
            self.pad_index = self.feat_embed.pad_index     # taken from the model
            self.args.pad_index = self.pad_index           # update
        else:
            self.feat_embed = nn.Embedding(num_embeddings=args.n_feats,
                                           embedding_dim=args.n_feat_embed)
            self.pad_index = args.pad_index
        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        if args.n_lstm_layers:
            # the lstm layer
            self.lstm = BiLSTM(input_size=args.n_embed+args.n_feat_embed,
                               hidden_size=args.n_lstm_hidden,
                               num_layers=args.n_lstm_layers,
                               dropout=args.lstm_dropout)
            self.lstm_dropout = SharedDropout(p=args.lstm_dropout)
            mlp_input_size = args.n_lstm_hidden*2
        else:
            self.lstm = None
            mlp_input_size = args.n_embed + args.n_feat_embed

        # the MLP layers
        self.mlp_arc_d = MLP(n_in=mlp_input_size,
                             n_out=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_h = MLP(n_in=mlp_input_size,
                             n_out=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=mlp_input_size,
                             n_out=args.n_mlp_rel,
                             dropout=args.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=mlp_input_size,
                             n_out=args.n_mlp_rel,
                             dropout=args.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=args.n_mlp_rel,
                                 n_out=args.n_rels,
                                 bias_x=True,
                                 bias_y=True)

        # transformer attention
        if args.use_attentions:
            self.attn_mix = nn.Parameter(torch.randn(1)) #2)) # 1))

        # # distance
        # self.args.distance = False # DEBUG
        # if self.args.distance:
        #     self.distance = DeepBiaffine(mlp_input_size, mlp_input_size, self.args.deep_biaff_hidden_dim, 1, dropout=args.mlp_dropout)

        self.criterion = nn.CrossEntropyLoss()
Esempio n. 8
0
    def __init__(self, config, embed):
        super(BiaffineParser, self).__init__()

        self.config = config
        # the embedding layer
        self.pretrained = nn.Embedding.from_pretrained(embed)
        self.word_embed = nn.Embedding(num_embeddings=config.n_words,
                                       embedding_dim=config.n_embed)
        # the char-lstm layer
        self.char_lstm = CHAR_LSTM(n_chars=config.n_chars,
                                   n_embed=config.n_char_embed,
                                   n_out=config.n_embed)
        self.embed_dropout = IndependentDropout(p=config.embed_dropout)

        self.tag_lstm = BiLSTM(input_size=config.n_embed * 2,
                               hidden_size=config.n_lstm_hidden,
                               num_layers=config.n_lstm_layers,
                               dropout=config.lstm_dropout)
        self.dep_lstm = BiLSTM(input_size=config.n_embed * 2 +
                               config.n_mlp_arc,
                               hidden_size=config.n_lstm_hidden,
                               num_layers=config.n_lstm_layers,
                               dropout=config.lstm_dropout)
        if config.weight:
            self.tag_mix = ScalarMix(n_layers=config.n_lstm_layers)
            self.dep_mix = ScalarMix(n_layers=config.n_lstm_layers)
        self.lstm_dropout = SharedDropout(p=config.lstm_dropout)

        # the MLP layers
        self.mlp_tag = MLP(n_in=config.n_lstm_hidden * 2,
                           n_hidden=config.n_mlp_arc,
                           dropout=0.5)
        self.mlp_dep = MLP(n_in=config.n_lstm_hidden * 2,
                           n_hidden=config.n_mlp_arc,
                           dropout=config.mlp_dropout)
        self.mlp_arc_h = MLP(n_in=config.n_lstm_hidden * 2,
                             n_hidden=config.n_mlp_arc,
                             dropout=config.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=config.n_lstm_hidden * 2,
                             n_hidden=config.n_mlp_arc,
                             dropout=config.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=config.n_lstm_hidden * 2,
                             n_hidden=config.n_mlp_rel,
                             dropout=config.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=config.n_lstm_hidden * 2,
                             n_hidden=config.n_mlp_rel,
                             dropout=config.mlp_dropout)

        self.ffn_pos_tag = nn.Linear(config.n_mlp_arc, config.n_pos_tags)
        self.ffn_dep_tag = nn.Linear(config.n_mlp_arc, config.n_dep_tags)
        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=config.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=config.n_mlp_rel,
                                 n_out=config.n_rels,
                                 bias_x=True,
                                 bias_y=True)
        self.weight = config.weight
        self.pad_index = config.pad_index
        self.unk_index = config.unk_index
        self.criterion = nn.CrossEntropyLoss()

        self.reset_parameters()
Esempio n. 9
0
    def __init__(self, args):
        super(Model, self).__init__()

        self.args = args
        self.pretrained = False
        # the embedding layer
        self.char_embed = nn.Embedding(num_embeddings=args.n_chars,
                                       embedding_dim=args.n_embed)
        n_lstm_input = args.n_embed
        if args.feat == 'bert':
            self.feat_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.n_feat_embed)
            n_lstm_input += args.n_feat_embed
        if self.args.feat in {'bigram', 'trigram'}:
            self.bigram_embed = nn.Embedding(num_embeddings=args.n_bigrams,
                                             embedding_dim=args.n_embed)
            n_lstm_input += args.n_embed
        if self.args.feat == 'trigram':
            self.trigram_embed = nn.Embedding(num_embeddings=args.n_trigrams,
                                              embedding_dim=args.n_embed)
            n_lstm_input += args.n_embed

        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # the lstm layer
        self.lstm = BiLSTM(input_size=n_lstm_input,
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # the MLP layers
        self.mlp_span_l = MLP(n_in=args.n_lstm_hidden * 2,
                              n_out=args.n_mlp_span,
                              dropout=args.mlp_dropout)
        self.mlp_span_r = MLP(n_in=args.n_lstm_hidden * 2,
                              n_out=args.n_mlp_span,
                              dropout=args.mlp_dropout)

        # the Biaffine layers
        self.span_attn = Biaffine(n_in=args.n_mlp_span,
                                  bias_x=True,
                                  bias_y=False)

        if args.link == 'mlp':
            # a representation that a fencepost is a split point
            self.mlp_span_s = MLP(n_in=args.n_lstm_hidden * 2,
                                  n_out=args.n_mlp_span,
                                  dropout=args.mlp_dropout)

            # scores for split points
            self.score_split = nn.Linear(args.n_mlp_span, 1)

        elif args.link == 'att':
            self.split_attn = ElementWiseBiaffine(n_in=args.n_lstm_hidden,
                                                  bias_x=True,
                                                  bias_y=False)

        self.pad_index = args.pad_index
        self.unk_index = args.unk_index
Esempio n. 10
0
    def __init__(self, args):
        super(Model, self).__init__()

        self.args = args
        # the embedding layer
        self.word_embed = nn.Embedding(num_embeddings=args.n_words,
                                       embedding_dim=args.n_embed)
        if args.use_char:
            self.char_embed = CHAR_LSTM(n_chars=args.n_char_feats,
                                        n_embed=args.n_char_embed,
                                        n_out=args.n_embed)
        if args.use_bert:
            self.bert_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.n_embed)
        if args.use_pos:
            self.pos_embed = nn.Embedding(num_embeddings=args.n_pos_feats,
                                          embedding_dim=args.n_embed)
        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # the word-lstm layer
        self.lstm = BiLSTM(input_size=args.n_embed *
                           (args.use_char + args.use_bert + args.use_pos + 1),
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_rel,
                             dropout=args.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_rel,
                             dropout=args.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=args.n_mlp_rel,
                                 n_out=args.n_rels,
                                 bias_x=True,
                                 bias_y=True)
        self.binary = args.binary
        # the Second Order Parts
        if self.args.use_second_order:
            self.use_sib = args.use_sib
            self.use_cop = args.use_cop
            self.use_gp = args.use_gp
            if args.use_sib:
                self.mlp_sib_h = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.mlp_sib_d = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.trilinear_sib = TrilinearScorer(args.n_mlp_sec,
                                                     args.n_mlp_sec,
                                                     args.n_mlp_sec,
                                                     init_std=args.init_std,
                                                     rank=args.n_mlp_sec,
                                                     factorize=args.factorize)
            if args.use_cop:
                self.mlp_cop_h = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.mlp_cop_d = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.trilinear_cop = TrilinearScorer(args.n_mlp_sec,
                                                     args.n_mlp_sec,
                                                     args.n_mlp_sec,
                                                     init_std=args.init_std,
                                                     rank=args.n_mlp_sec,
                                                     factorize=args.factorize)
            if args.use_gp:
                self.mlp_gp_h = MLP(n_in=args.n_lstm_hidden * 2,
                                    n_hidden=args.n_mlp_sec,
                                    dropout=args.mlp_dropout,
                                    identity=self.binary)
                self.mlp_gp_d = MLP(n_in=args.n_lstm_hidden * 2,
                                    n_hidden=args.n_mlp_sec,
                                    dropout=args.mlp_dropout,
                                    identity=self.binary)
                self.mlp_gp_hd = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.trilinear_gp = TrilinearScorer(args.n_mlp_sec,
                                                    args.n_mlp_sec,
                                                    args.n_mlp_sec,
                                                    init_std=args.init_std,
                                                    rank=args.n_mlp_sec,
                                                    factorize=args.factorize)

        self.pad_index = args.pad_index
        self.unk_index = args.unk_index