def layer_forward(self, x, hx, cell, batch_sizes, reverse=False): """single bilstm layer forward network""" hx_0 = hx_i = hx hx_n, output = [], [] steps = reversed(range(len(x))) if reverse else range(len(x)) if self.training and self.dropout > 0: hid_mask = SharedDropout.get_mask(hx_0[0], self.dropout) for t in steps: last_bs, bs = len(hx_i[0]), batch_sizes[t] if last_bs < bs: hx_i = [ layers.concat((h, ih[last_bs:bs])) for h, ih in zip(hx_i, hx_0) ] else: if bs < hx_i[0].shape[0]: hx_n.append([hx_i[0][bs:], hx_i[1][bs:]]) hx_i = [h[:bs] for h in hx_i] hx_i = [h for h in cell(x[t], *hx_i)] output.append(hx_i[0]) if self.training and self.dropout > 0: hx_i[0] = hx_i[0] * hid_mask[:bs] if reverse: hx_n = hx_i output.reverse() else: hx_n.append(hx_i) hx_n = [layers.concat(h) for h in zip(*reversed(hx_n))] output = layers.concat(output) return output, hx_n
def __init__(self, args): super(LSTMEmbed, self).__init__(args) # Initialize feat feature, feat can be char or pos if args.feat == "char": self.feat_embed = CharLSTM( n_chars=args.n_feats, n_embed=args.n_char_embed, n_out=args.n_lstm_feat_embed, pad_index=args.feat_pad_index, ) feat_embed_size = args.n_lstm_feat_embed else: self.feat_embed = dygraph.Embedding(size=(args.n_feats, args.n_feat_embed)) feat_embed_size = args.n_feat_embed # lstm layer self.lstm = BiLSTM( input_size=args.n_embed + feat_embed_size, hidden_size=args.n_lstm_hidden, num_layers=args.n_lstm_layers, dropout=args.lstm_dropout, ) self.lstm_dropout = SharedDropout(p=args.lstm_dropout) self.mlp_input_size = args.n_lstm_hidden * 2
def __init__(self, n_in, n_out, dropout=0): super(MLP, self).__init__() self.n_in = n_in self.n_out = n_out self.linear = dygraph.Linear( n_in, n_out, param_attr=initializer.Xavier(uniform=False), bias_attr=None, ) self.dropout = SharedDropout(p=dropout)
def __init__(self, args): super(LSTMByWPEmbed, self).__init__(args) self.args = args self.init_ernie_model(args) # lstm layer self.lstm = BiLSTM( input_size=args.lstm_by_wp_embed_size, hidden_size=args.n_lstm_hidden, num_layers=args.n_lstm_layers, dropout=args.lstm_dropout, ) self.lstm_dropout = SharedDropout(p=args.lstm_dropout) self.mlp_input_size = args.n_lstm_hidden * 2
def forward(self, x, seq_mask, pad_index, hx=None): """Forward network""" x, batch_sizes, sorted_indices = self.pack_padded_sequence( x, seq_mask, pad_index) _, unsorted_indices = layers.argsort(sorted_indices) batch_size = batch_sizes[0] h_n, c_n = [], [] if hx is None: ih = layers.zeros(shape=(self.num_layers * 2, batch_size, self.hidden_size), dtype=x[0].dtype) h, c = ih, ih else: h, c = self.permute_hidden(hx, sorted_indices) h = layers.reshape(h, shape=(self.num_layers, 2, -1, self.hidden_size)) c = layers.reshape(c, shape=(self.num_layers, 2, -1, self.hidden_size)) for i in range(self.num_layers): x = layers.split(x, batch_sizes, dim=0) if self.training and self.dropout > 0: mask = SharedDropout.get_mask(x[0], self.dropout) x = [j * mask[:len(j)] for j in x] x_f, (h_f, c_f) = self.layer_forward(x=x, hx=(h[i, 0], c[i, 0]), cell=self.f_cells[i], batch_sizes=batch_sizes) x_b, (h_b, c_b) = self.layer_forward(x=x, hx=(h[i, 1], c[i, 1]), cell=self.b_cells[i], batch_sizes=batch_sizes, reverse=True) x = layers.concat((x_f, x_b), axis=-1) h_n.append(layers.stack((h_f, h_b))) c_n.append(layers.stack((c_f, c_b))) x = self.pad_packed_sequence(x, batch_sizes, unsorted_indices) hx = layers.concat(h_n, axis=0), layers.concat(c_n, axis=0) hx = self.permute_hidden(hx, unsorted_indices) return x, hx
def __init__(self, args, pretrained_embed=None): super(Model, self).__init__() self.args = args # the embedding layer self.word_embed = dygraph.Embedding(size=(args.n_words, args.n_embed)) if args.pretrained_embed_shape is not None: if pretrained_embed is not None: pre_param_attrs = fluid.ParamAttr( name="pretrained_emb", initializer=initializer.NumpyArrayInitializer( pretrained_embed), trainable=True) self.pretrained = dygraph.Embedding( size=args.pretrained_embed_shape, param_attr=pre_param_attrs) self.word_embed.weight = layers.create_parameter( shape=(self.args.n_words, self.args.n_embed), dtype='float32', default_initializer=initializer.Constant(value=0.0)) else: self.pretrained = dygraph.Embedding( size=args.pretrained_embed_shape) # Initialize feat feature, feat can be char or pos if args.feat == 'char': self.feat_embed = CharLSTM(n_chars=args.n_feats, n_embed=args.n_char_embed, n_out=args.n_feat_embed, pad_index=args.feat_pad_index) else: self.feat_embed = dygraph.Embedding(size=(args.n_feats, args.n_feat_embed)) self.embed_dropout = IndependentDropout(p=args.embed_dropout) # lstm layer self.lstm = BiLSTM(input_size=args.n_embed + args.n_feat_embed, hidden_size=args.n_lstm_hidden, num_layers=args.n_lstm_layers, dropout=args.lstm_dropout) self.lstm_dropout = SharedDropout(p=args.lstm_dropout) # mlp layer self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden * 2, n_out=args.n_mlp_arc, dropout=args.mlp_dropout) self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden * 2, n_out=args.n_mlp_arc, dropout=args.mlp_dropout) self.mlp_rel_h = MLP(n_in=args.n_lstm_hidden * 2, n_out=args.n_mlp_rel, dropout=args.mlp_dropout) self.mlp_rel_d = MLP(n_in=args.n_lstm_hidden * 2, n_out=args.n_mlp_rel, dropout=args.mlp_dropout) # biaffine layers self.arc_attn = Biaffine(n_in=args.n_mlp_arc, bias_x=True, bias_y=False) self.rel_attn = Biaffine(n_in=args.n_mlp_rel, n_out=args.n_rels, bias_x=True, bias_y=True) self.pad_index = args.pad_index self.unk_index = args.unk_index