Beispiel #1
0
    def layer_forward(self, x, hx, cell, batch_sizes, reverse=False):
        h, c = hx
        init_h, init_c = h, c
        output, seq_len = [], len(x)
        steps = reversed(range(seq_len)) if reverse else range(seq_len)
        if self.training:
            hid_mask = SharedDropout.get_mask(h, self.dropout)

        for t in steps:
            last_batch_size, batch_size = len(h), batch_sizes[t]
            if last_batch_size < batch_size:
                h = torch.cat((h, init_h[last_batch_size:batch_size]))
                c = torch.cat((c, init_c[last_batch_size:batch_size]))
            else:
                h = h[:batch_size]
                c = c[:batch_size]
            h, c = cell(input=x[t], hx=(h, c))
            output.append(h)
            if self.training:
                h = h * hid_mask[:batch_size]
        if reverse:
            output.reverse()
        output = torch.cat(output)

        return output
Beispiel #2
0
    def forward(self, x, hx=None):
        x, batch_sizes = x
        batch_size = batch_sizes[0]

        if hx is None:
            init = x.new_zeros(batch_size, self.hidden_size)
            hx = (init, init)

        for layer in range(self.num_layers):
            if self.training:
                mask = SharedDropout.get_mask(x[:batch_size], self.dropout)
                mask = torch.cat(
                    [mask[:batch_size] for batch_size in batch_sizes])
                x *= mask
            x = torch.split(x, batch_sizes.tolist())
            f_output = self.layer_forward(x=x,
                                          hx=hx,
                                          cell=self.f_cells[layer],
                                          batch_sizes=batch_sizes,
                                          reverse=False)
            b_output = self.layer_forward(x=x,
                                          hx=hx,
                                          cell=self.b_cells[layer],
                                          batch_sizes=batch_sizes,
                                          reverse=True)
            x = torch.cat([f_output, b_output], -1)
        x = PackedSequence(x, batch_sizes)

        return x
Beispiel #3
0
    def layer_forward(self, x, hx, cell, batch_sizes, reverse=False):
        hx_0 = hx_i = hx
        hx_n, output = [], []
        steps = reversed(range(len(x))) if reverse else range(len(x))
        if self.training:
            hid_mask = SharedDropout.get_mask(hx_0[0], self.dropout)

        for t in steps:
            last_batch_size, batch_size = len(hx_i[0]), batch_sizes[t]
            if last_batch_size < batch_size:
                hx_i = [
                    torch.cat((h, ih[last_batch_size:batch_size]))
                    for h, ih in zip(hx_i, hx_0)
                ]
            else:
                hx_n.append([h[batch_size:] for h in hx_i])
                hx_i = [h[:batch_size] for h in hx_i]
            hx_i = [h for h in cell(x[t], hx_i)]
            output.append(hx_i[0])
            if self.training:
                hx_i[0] = hx_i[0] * hid_mask[:batch_size]
        if reverse:
            hx_n = hx_i
            output.reverse()
        else:
            hx_n.append(hx_i)
            hx_n = [torch.cat(h) for h in zip(*reversed(hx_n))]
        output = torch.cat(output)

        return output, hx_n
Beispiel #4
0
    def __init__(self, params):
        super(BiaffineParser, self).__init__()

        self.params = params
        # self.word_dropout = nn.Dropout(p=params['word_dropout'])
        # self.word_dropout_p = params['word_dropout']

        # BERT
        # self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.bert = BertModel.from_pretrained('bert-base-cased')

        self.bert_dropout = SharedDropout(p=params['bert_dropout'])

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=params['n_bert_hidden'],
                             n_hidden=params['n_mlp_arc'],
                             dropout=params['mlp_dropout'])
        self.mlp_arc_d = MLP(n_in=params['n_bert_hidden'],
                             n_hidden=params['n_mlp_arc'],
                             dropout=params['mlp_dropout'])
        self.mlp_rel_h = MLP(n_in=params['n_bert_hidden'],
                             n_hidden=params['n_mlp_rel'],
                             dropout=params['mlp_dropout'])
        self.mlp_rel_d = MLP(n_in=params['n_bert_hidden'],
                             n_hidden=params['n_mlp_rel'],
                             dropout=params['mlp_dropout'])

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=params['n_mlp_arc'],
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=params['n_mlp_rel'],
                                 n_out=params['n_rels'],
                                 bias_x=True,
                                 bias_y=True)
Beispiel #5
0
    def __init__(self, n_in, n_hidden, dropout=0):
        super(MLP, self).__init__()

        self.linear = nn.Linear(n_in, n_hidden)
        self.activation = nn.LeakyReLU(negative_slope=0.1)
        self.dropout = SharedDropout(p=dropout)

        self.reset_parameters()
Beispiel #6
0
    def __init__(self,
                 n_in,
                 n_hidden,
                 activation=nn.LeakyReLU(0.1),
                 dropout=0):
        super(MLP, self).__init__()

        self.linear = nn.Linear(n_in, n_hidden)
        self.activation = activation
        self.dropout = SharedDropout(dropout)

        self.reset_parameters()
    def __init__(self, args):
        super(Model, self).__init__()

        self.args = args
        # the embedding layer
        self.word_embed = nn.Embedding(num_embeddings=args.n_words,
                                       embedding_dim=args.n_embed)
        if args.feat == 'char':
            self.feat_embed = CHAR_LSTM(n_chars=args.n_feats,
                                        n_embed=args.n_char_embed,
                                        n_out=args.n_embed)
        elif args.feat == 'bert':
            self.feat_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.n_embed)
        else:
            self.feat_embed = nn.Embedding(num_embeddings=args.n_feats,
                                           embedding_dim=args.n_embed)
        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # the word-lstm layer
        self.lstm = BiLSTM(input_size=args.n_embed * 2,
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_rel,
                             dropout=args.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_rel,
                             dropout=args.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=args.n_mlp_rel,
                                 n_out=args.n_rels,
                                 bias_x=True,
                                 bias_y=True)
        self.pad_index = args.pad_index
        self.unk_index = args.unk_index
Beispiel #8
0
    def __init__(self, args):
        super(Model, self).__init__()

        self.args = args
        # the embedding layer
        if args.bert is False:
            self.word_embed = nn.Embedding(num_embeddings=args.n_words,
                                           embedding_dim=args.word_embed)
            if args.freeze_word_emb:
                self.word_embed.weight.requires_grad = False
        else:
            self.word_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.word_embed)

        self.feat_embed = nn.Embedding(num_embeddings=args.n_feats,
                                       embedding_dim=args.n_embed)

        if args.freeze_feat_emb:
            self.feat_embed.weight.requires_grad = False

        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # the word-lstm layer
        self.lstm = BiLSTM(input_size=args.word_embed + args.n_embed,
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)

        self.pad_index = args.pad_index
        self.unk_index = args.unk_index

        self.multinomial = nn.Parameter(torch.ones(args.n_feats, args.n_feats))
Beispiel #9
0
    def __init__(self, vocab, n_embed, n_char_embed, n_char_out, n_lstm_hidden,
                 n_lstm_layers, n_mlp_arc, n_mlp_lab, n_labels, drop):
        super(BiAffineParser, self).__init__()

        self.vocab = vocab
        # the embedding layer
        self.embed = nn.Embedding(vocab.n_train_words, n_embed)
        self.pretrained = nn.Embedding.from_pretrained(vocab.embeddings)
        # the char-lstm layer
        self.char_lstm = CharLSTM(n_char=vocab.n_chars,
                                  n_embed=n_char_embed,
                                  n_out=n_char_out)
        self.embed_drop = IndependentDropout(p=drop)

        # the word-lstm layer
        self.lstm = ParserLSTM(input_size=n_embed + n_char_out,
                               hidden_size=n_lstm_hidden,
                               num_layers=n_lstm_layers,
                               batch_first=True,
                               dropout=drop,
                               bidirectional=True)
        self.lstm_drop = SharedDropout(p=drop)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=n_lstm_hidden * 2,
                             n_hidden=n_mlp_arc,
                             drop=drop)
        self.mlp_arc_d = MLP(n_in=n_lstm_hidden * 2,
                             n_hidden=n_mlp_arc,
                             drop=drop)
        self.mlp_lab_h = MLP(n_in=n_lstm_hidden * 2,
                             n_hidden=n_mlp_lab,
                             drop=drop)
        self.mlp_lab_d = MLP(n_in=n_lstm_hidden * 2,
                             n_hidden=n_mlp_lab,
                             drop=drop)

        # the BiAffine layers
        self.arc_attn = BiAffine(n_in=n_mlp_arc, bias_x=True, bias_y=False)
        self.lab_attn = BiAffine(n_in=n_mlp_lab,
                                 n_out=n_labels,
                                 bias_x=True,
                                 bias_y=True)

        self.reset_parameters()
Beispiel #10
0
    def forward(self, sequence, hx=None):
        x, batch_sizes = sequence.data, sequence.batch_sizes.tolist()
        batch_size = batch_sizes[0]
        h_n, c_n = [], []

        if hx is None:
            ih = x.new_zeros(self.num_layers * 2, batch_size, self.hidden_size)
            h, c = ih, ih
        else:
            h, c = self.permute_hidden(hx, sequence.sorted_indices)
        h = h.view(self.num_layers, 2, batch_size, self.hidden_size)
        c = c.view(self.num_layers, 2, batch_size, self.hidden_size)

        for i in range(self.num_layers):
            x = torch.split(x, batch_sizes)
            if self.training:
                mask = SharedDropout.get_mask(x[0], self.dropout)
                x = [i * mask[:len(i)] for i in x]
            x_f, (h_f, c_f) = self.layer_forward(x=x,
                                                 hx=(h[i, 0], c[i, 0]),
                                                 cell=self.f_cells[i],
                                                 batch_sizes=batch_sizes)
            x_b, (h_b, c_b) = self.layer_forward(x=x,
                                                 hx=(h[i, 1], c[i, 1]),
                                                 cell=self.b_cells[i],
                                                 batch_sizes=batch_sizes,
                                                 reverse=True)
            x = torch.cat((x_f, x_b), -1)
            h_n.append(torch.stack((h_f, h_b)))
            c_n.append(torch.stack((c_f, c_b)))
        x = PackedSequence(x, sequence.batch_sizes, sequence.sorted_indices,
                           sequence.unsorted_indices)
        hx = torch.cat(h_n, 0), torch.cat(c_n, 0)
        hx = self.permute_hidden(hx, sequence.unsorted_indices)

        return x, hx
Beispiel #11
0
    def __init__(self, args, mask_token_id=0):
        super().__init__()

        self.args = args
        if args.n_embed:
            # the embedding layer
            self.word_embed = nn.Embedding(num_embeddings=args.n_words,
                                           embedding_dim=args.n_embed)
            self.unk_index = args.unk_index
        else:
            self.word_embed = None
        if args.feat == 'char':
            self.feat_embed = CharLSTM(n_chars=args.n_feats,
                                       n_embed=args.n_char_embed,
                                       n_out=args.n_feat_embed,
                                       pad_index=args.feat_pad_index)
            self.pad_index = args.pad_index
        elif args.feat == 'bert':
            self.feat_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.n_feat_embed,
                                            requires_grad=args.bert_fine_tune,
                                            mask_token_id=mask_token_id,
                                            token_dropout=args.token_dropout,
                                            mix_dropout=args.mix_dropout,
                                            use_hidden_states=args.use_hidden_states,
                                            use_attentions=args.use_attentions,
                                            attention_layer=args.attention_layer)
            #self.args.n_mlp_arc = self.feat_embed.bert.config.max_position_embeddings
            self.args.n_feat_embed = self.feat_embed.n_out # taken from the model
            self.args.n_bert_layers = self.feat_embed.n_layers # taken from the model
            self.pad_index = self.feat_embed.pad_index     # taken from the model
            self.args.pad_index = self.pad_index           # update
        else:
            self.feat_embed = nn.Embedding(num_embeddings=args.n_feats,
                                           embedding_dim=args.n_feat_embed)
            self.pad_index = args.pad_index
        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        if args.n_lstm_layers:
            # the lstm layer
            self.lstm = BiLSTM(input_size=args.n_embed+args.n_feat_embed,
                               hidden_size=args.n_lstm_hidden,
                               num_layers=args.n_lstm_layers,
                               dropout=args.lstm_dropout)
            self.lstm_dropout = SharedDropout(p=args.lstm_dropout)
            mlp_input_size = args.n_lstm_hidden*2
        else:
            self.lstm = None
            mlp_input_size = args.n_embed + args.n_feat_embed

        # the MLP layers
        self.mlp_arc_d = MLP(n_in=mlp_input_size,
                             n_out=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_h = MLP(n_in=mlp_input_size,
                             n_out=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=mlp_input_size,
                             n_out=args.n_mlp_rel,
                             dropout=args.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=mlp_input_size,
                             n_out=args.n_mlp_rel,
                             dropout=args.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=args.n_mlp_rel,
                                 n_out=args.n_rels,
                                 bias_x=True,
                                 bias_y=True)

        # transformer attention
        if args.use_attentions:
            self.attn_mix = nn.Parameter(torch.randn(1)) #2)) # 1))

        # # distance
        # self.args.distance = False # DEBUG
        # if self.args.distance:
        #     self.distance = DeepBiaffine(mlp_input_size, mlp_input_size, self.args.deep_biaff_hidden_dim, 1, dropout=args.mlp_dropout)

        self.criterion = nn.CrossEntropyLoss()
Beispiel #12
0
    def __init__(self, args):
        super(Model, self).__init__()

        self.args = args
        self.pretrained = False
        # the embedding layer
        self.char_embed = nn.Embedding(num_embeddings=args.n_chars,
                                       embedding_dim=args.n_embed)
        n_lstm_input = args.n_embed
        if args.feat == 'bert':
            self.feat_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.n_feat_embed)
            n_lstm_input += args.n_feat_embed
        if self.args.feat in {'bigram', 'trigram'}:
            self.bigram_embed = nn.Embedding(num_embeddings=args.n_bigrams,
                                             embedding_dim=args.n_embed)
            n_lstm_input += args.n_embed
        if self.args.feat == 'trigram':
            self.trigram_embed = nn.Embedding(num_embeddings=args.n_trigrams,
                                              embedding_dim=args.n_embed)
            n_lstm_input += args.n_embed

        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # the lstm layer
        self.lstm = BiLSTM(input_size=n_lstm_input,
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # the MLP layers
        self.mlp_span_l = MLP(n_in=args.n_lstm_hidden * 2,
                              n_out=args.n_mlp_span,
                              dropout=args.mlp_dropout)
        self.mlp_span_r = MLP(n_in=args.n_lstm_hidden * 2,
                              n_out=args.n_mlp_span,
                              dropout=args.mlp_dropout)

        # the Biaffine layers
        self.span_attn = Biaffine(n_in=args.n_mlp_span,
                                  bias_x=True,
                                  bias_y=False)

        if args.link == 'mlp':
            # a representation that a fencepost is a split point
            self.mlp_span_s = MLP(n_in=args.n_lstm_hidden * 2,
                                  n_out=args.n_mlp_span,
                                  dropout=args.mlp_dropout)

            # scores for split points
            self.score_split = nn.Linear(args.n_mlp_span, 1)

        elif args.link == 'att':
            self.split_attn = ElementWiseBiaffine(n_in=args.n_lstm_hidden,
                                                  bias_x=True,
                                                  bias_y=False)

        self.pad_index = args.pad_index
        self.unk_index = args.unk_index
Beispiel #13
0
    def __init__(self, args):
        super(Model, self).__init__()

        self.args = args
        # the embedding layer
        self.word_embed = nn.Embedding(num_embeddings=args.n_words,
                                       embedding_dim=args.n_embed)
        if args.use_char:
            self.char_embed = CHAR_LSTM(n_chars=args.n_char_feats,
                                        n_embed=args.n_char_embed,
                                        n_out=args.n_embed)
        if args.use_bert:
            self.bert_embed = BertEmbedding(model=args.bert_model,
                                            n_layers=args.n_bert_layers,
                                            n_out=args.n_embed)
        if args.use_pos:
            self.pos_embed = nn.Embedding(num_embeddings=args.n_pos_feats,
                                          embedding_dim=args.n_embed)
        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # the word-lstm layer
        self.lstm = BiLSTM(input_size=args.n_embed *
                           (args.use_char + args.use_bert + args.use_pos + 1),
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # the MLP layers
        self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_rel,
                             dropout=args.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_hidden=args.n_mlp_rel,
                             dropout=args.mlp_dropout)

        # the Biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=args.n_mlp_rel,
                                 n_out=args.n_rels,
                                 bias_x=True,
                                 bias_y=True)
        self.binary = args.binary
        # the Second Order Parts
        if self.args.use_second_order:
            self.use_sib = args.use_sib
            self.use_cop = args.use_cop
            self.use_gp = args.use_gp
            if args.use_sib:
                self.mlp_sib_h = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.mlp_sib_d = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.trilinear_sib = TrilinearScorer(args.n_mlp_sec,
                                                     args.n_mlp_sec,
                                                     args.n_mlp_sec,
                                                     init_std=args.init_std,
                                                     rank=args.n_mlp_sec,
                                                     factorize=args.factorize)
            if args.use_cop:
                self.mlp_cop_h = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.mlp_cop_d = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.trilinear_cop = TrilinearScorer(args.n_mlp_sec,
                                                     args.n_mlp_sec,
                                                     args.n_mlp_sec,
                                                     init_std=args.init_std,
                                                     rank=args.n_mlp_sec,
                                                     factorize=args.factorize)
            if args.use_gp:
                self.mlp_gp_h = MLP(n_in=args.n_lstm_hidden * 2,
                                    n_hidden=args.n_mlp_sec,
                                    dropout=args.mlp_dropout,
                                    identity=self.binary)
                self.mlp_gp_d = MLP(n_in=args.n_lstm_hidden * 2,
                                    n_hidden=args.n_mlp_sec,
                                    dropout=args.mlp_dropout,
                                    identity=self.binary)
                self.mlp_gp_hd = MLP(n_in=args.n_lstm_hidden * 2,
                                     n_hidden=args.n_mlp_sec,
                                     dropout=args.mlp_dropout,
                                     identity=self.binary)
                self.trilinear_gp = TrilinearScorer(args.n_mlp_sec,
                                                    args.n_mlp_sec,
                                                    args.n_mlp_sec,
                                                    init_std=args.init_std,
                                                    rank=args.n_mlp_sec,
                                                    factorize=args.factorize)

        self.pad_index = args.pad_index
        self.unk_index = args.unk_index