Example #1
0
    def layer_forward(self, x, hx, cell, batch_sizes, reverse=False):
        """single bilstm layer forward network"""
        hx_0 = hx_i = hx
        hx_n, output = [], []
        steps = reversed(range(len(x))) if reverse else range(len(x))
        if self.training and self.dropout > 0:
            hid_mask = SharedDropout.get_mask(hx_0[0], self.dropout)

        for t in steps:
            last_bs, bs = len(hx_i[0]), batch_sizes[t]
            if last_bs < bs:
                hx_i = [
                    layers.concat((h, ih[last_bs:bs]))
                    for h, ih in zip(hx_i, hx_0)
                ]
            else:
                if bs < hx_i[0].shape[0]:
                    hx_n.append([hx_i[0][bs:], hx_i[1][bs:]])
                hx_i = [h[:bs] for h in hx_i]
            hx_i = [h for h in cell(x[t], *hx_i)]
            output.append(hx_i[0])
            if self.training and self.dropout > 0:
                hx_i[0] = hx_i[0] * hid_mask[:bs]
        if reverse:
            hx_n = hx_i
            output.reverse()
        else:
            hx_n.append(hx_i)
            hx_n = [layers.concat(h) for h in zip(*reversed(hx_n))]
        output = layers.concat(output)

        return output, hx_n
Example #2
0
    def __init__(self, args):
        super(LSTMEmbed, self).__init__(args)

        # Initialize feat feature, feat can be char or pos
        if args.feat == "char":
            self.feat_embed = CharLSTM(
                n_chars=args.n_feats,
                n_embed=args.n_char_embed,
                n_out=args.n_lstm_feat_embed,
                pad_index=args.feat_pad_index,
            )
            feat_embed_size = args.n_lstm_feat_embed

        else:
            self.feat_embed = dygraph.Embedding(size=(args.n_feats,
                                                      args.n_feat_embed))
            feat_embed_size = args.n_feat_embed

        # lstm layer
        self.lstm = BiLSTM(
            input_size=args.n_embed + feat_embed_size,
            hidden_size=args.n_lstm_hidden,
            num_layers=args.n_lstm_layers,
            dropout=args.lstm_dropout,
        )
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)
        self.mlp_input_size = args.n_lstm_hidden * 2
Example #3
0
    def __init__(self, n_in, n_out, dropout=0):
        super(MLP, self).__init__()

        self.n_in = n_in
        self.n_out = n_out
        self.linear = dygraph.Linear(
            n_in,
            n_out,
            param_attr=initializer.Xavier(uniform=False),
            bias_attr=None,
        )
        self.dropout = SharedDropout(p=dropout)
Example #4
0
 def __init__(self, args):
     super(LSTMByWPEmbed, self).__init__(args)
     self.args = args
     self.init_ernie_model(args)
     # lstm layer
     self.lstm = BiLSTM(
         input_size=args.lstm_by_wp_embed_size,
         hidden_size=args.n_lstm_hidden,
         num_layers=args.n_lstm_layers,
         dropout=args.lstm_dropout,
     )
     self.lstm_dropout = SharedDropout(p=args.lstm_dropout)
     self.mlp_input_size = args.n_lstm_hidden * 2
Example #5
0
    def forward(self, x, seq_mask, pad_index, hx=None):
        """Forward network"""
        x, batch_sizes, sorted_indices = self.pack_padded_sequence(
            x, seq_mask, pad_index)
        _, unsorted_indices = layers.argsort(sorted_indices)
        batch_size = batch_sizes[0]
        h_n, c_n = [], []

        if hx is None:
            ih = layers.zeros(shape=(self.num_layers * 2, batch_size,
                                     self.hidden_size),
                              dtype=x[0].dtype)
            h, c = ih, ih
        else:
            h, c = self.permute_hidden(hx, sorted_indices)
        h = layers.reshape(h, shape=(self.num_layers, 2, -1, self.hidden_size))
        c = layers.reshape(c, shape=(self.num_layers, 2, -1, self.hidden_size))

        for i in range(self.num_layers):
            x = layers.split(x, batch_sizes, dim=0)
            if self.training and self.dropout > 0:
                mask = SharedDropout.get_mask(x[0], self.dropout)
                x = [j * mask[:len(j)] for j in x]
            x_f, (h_f, c_f) = self.layer_forward(x=x,
                                                 hx=(h[i, 0], c[i, 0]),
                                                 cell=self.f_cells[i],
                                                 batch_sizes=batch_sizes)
            x_b, (h_b, c_b) = self.layer_forward(x=x,
                                                 hx=(h[i, 1], c[i, 1]),
                                                 cell=self.b_cells[i],
                                                 batch_sizes=batch_sizes,
                                                 reverse=True)
            x = layers.concat((x_f, x_b), axis=-1)
            h_n.append(layers.stack((h_f, h_b)))
            c_n.append(layers.stack((c_f, c_b)))
        x = self.pad_packed_sequence(x, batch_sizes, unsorted_indices)
        hx = layers.concat(h_n, axis=0), layers.concat(c_n, axis=0)
        hx = self.permute_hidden(hx, unsorted_indices)

        return x, hx
Example #6
0
    def __init__(self, args, pretrained_embed=None):
        super(Model, self).__init__()
        self.args = args
        # the embedding layer
        self.word_embed = dygraph.Embedding(size=(args.n_words, args.n_embed))

        if args.pretrained_embed_shape is not None:
            if pretrained_embed is not None:
                pre_param_attrs = fluid.ParamAttr(
                    name="pretrained_emb",
                    initializer=initializer.NumpyArrayInitializer(
                        pretrained_embed),
                    trainable=True)
                self.pretrained = dygraph.Embedding(
                    size=args.pretrained_embed_shape,
                    param_attr=pre_param_attrs)
                self.word_embed.weight = layers.create_parameter(
                    shape=(self.args.n_words, self.args.n_embed),
                    dtype='float32',
                    default_initializer=initializer.Constant(value=0.0))
            else:
                self.pretrained = dygraph.Embedding(
                    size=args.pretrained_embed_shape)
        # Initialize feat feature, feat can be char or pos
        if args.feat == 'char':
            self.feat_embed = CharLSTM(n_chars=args.n_feats,
                                       n_embed=args.n_char_embed,
                                       n_out=args.n_feat_embed,
                                       pad_index=args.feat_pad_index)
        else:
            self.feat_embed = dygraph.Embedding(size=(args.n_feats,
                                                      args.n_feat_embed))
        self.embed_dropout = IndependentDropout(p=args.embed_dropout)

        # lstm layer
        self.lstm = BiLSTM(input_size=args.n_embed + args.n_feat_embed,
                           hidden_size=args.n_lstm_hidden,
                           num_layers=args.n_lstm_layers,
                           dropout=args.lstm_dropout)
        self.lstm_dropout = SharedDropout(p=args.lstm_dropout)

        # mlp layer
        self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_out=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_out=args.n_mlp_arc,
                             dropout=args.mlp_dropout)
        self.mlp_rel_h = MLP(n_in=args.n_lstm_hidden * 2,
                             n_out=args.n_mlp_rel,
                             dropout=args.mlp_dropout)
        self.mlp_rel_d = MLP(n_in=args.n_lstm_hidden * 2,
                             n_out=args.n_mlp_rel,
                             dropout=args.mlp_dropout)

        # biaffine layers
        self.arc_attn = Biaffine(n_in=args.n_mlp_arc,
                                 bias_x=True,
                                 bias_y=False)
        self.rel_attn = Biaffine(n_in=args.n_mlp_rel,
                                 n_out=args.n_rels,
                                 bias_x=True,
                                 bias_y=True)
        self.pad_index = args.pad_index
        self.unk_index = args.unk_index