def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection(input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection(input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def network(self): """ Implements the whole network of Match-LSTM. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') p_encs = [] p_matches = [] for p in self.p_ids: p_encs.append(self.get_enc(p, type='p')) q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left_' + '.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right_' + '.wq'), input=q_enc) for i, p in enumerate(p_encs): left_out = self.recurrent_group( self.name + '_left_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), p], reverse=False) right_out = self.recurrent_group( self.name + '_right_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), p], reverse=True) match_seq = layer.concat(input=[left_out, right_out]) match_seq_dropped = self.drop_out(match_seq, drop_rate=0.5) bi_match_seq = paddle.networks.bidirectional_lstm( input=match_seq_dropped, size=match_seq.size, fwd_mat_param_attr=Attr.Param('pn_f_enc_mat.w'), fwd_bias_param_attr=Attr.Param('pn_f_enc.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('pn_f_enc_inn.w'), bwd_mat_param_attr=Attr.Param('pn_b_enc_mat.w'), bwd_bias_param_attr=Attr.Param('pn_b_enc.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('pn_b_enc_inn.w'), return_seq=True) p_matches.append(bi_match_seq) all_docs = reduce(lambda x, y: layer.seq_concat(a=x, b=y), p_matches) all_docs_dropped = self.drop_out(all_docs, drop_rate=0.5) start = self.decode('start', all_docs_dropped) end = self.decode('end', all_docs_dropped) return start, end
def _step_basic(self, h_cur, u): expanded_h = layer.expand(input=h_cur, expand_as=u) hu = layer.concat(input=[expanded_h, u]) with layer.mixed(bias_attr=False) as dot_hu: dot_hu += layer.dotmul_operator(a=expanded_h, b=u) cat_all = layer.concat(input=[hu, dot_hu]) s = layer.fc(size=1, bias_attr=False, param_attr=Attr.Param(self.name + '.ws'), input=cat_all) return s
def _step(self, name, h_q_all, q_proj, h_p_cur): """ Match-LSTM step. This function performs operations done in one time step. Args: h_p_cur: Current hidden of paragraph encodings: h_i. This is the `REAL` input of the group, like x_t in normal rnn. h_q_all: Question encodings. Returns: The $h^{r}_{i}$ in the paper. """ direct = 'left' if 'left' in name else 'right' h_r_prev = paddle.layer.memory(name=name + '_out_', size=h_q_all.size, boot_layer=None) q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj) z_cur = self.fusion_layer(h_p_cur, q_expr) with layer.mixed(size=h_q_all.size * 4, act=Act.Tanh(), bias_attr=False) as match_input: match_input += layer.full_matrix_projection( input=z_cur, param_attr=Attr.Param('match_input_%s.w0' % direct)) step_out = paddle.networks.lstmemory_unit( name=name + '_out_', out_memory=h_r_prev, param_attr=Attr.Param('step_lstm_%s.w' % direct), input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct, initial_std=0.), lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct, initial_std=0.), input=match_input, size=h_q_all.size) return step_out
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group( self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group( self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls
def network(self, question, evidence, qe_comm, ee_comm, conf): """ Implements the whole network of Match-LSTM. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ q_enc = self.get_enc(question, conf, type='q') p_enc = self.get_enc(evidence, conf, type='q') q_proj_left = layer.fc(size=conf.word_vec_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left_' + '.wq'), input=q_enc) q_proj_right = layer.fc(size=conf.word_vec_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right_' + '.wq'), input=q_enc) # StaticInput 定义了一个只读的Memory,由StaticInput指定的输入不会被recurrent_group拆解, # recurrent_group 循环展开的每个时间步总是能够引用所有输入,可以是一个非序列,或者一个单层序列。 left_out = self.recurrent_group(self.name + '_left', [ layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), p_enc, qe_comm, ee_comm ], reverse=False) right_out = self.recurrent_group(self.name + '_right_', [ layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), p_enc, qe_comm, ee_comm ], reverse=True) match_seq = layer.concat(input=[left_out, right_out]) return self.drop_out(match_seq, drop_rate=0.5)
def _get_enc(self, input, type='q'): embs = self.get_embs(input) enc = networks.bidirectional_lstm( input=embs, size=self.emb_dim, fwd_mat_param_attr=Attr.Param(self.name + '_f_enc_mat.w' + type), fwd_bias_param_attr=Attr.Param(self.name + '_f_enc.bias' + type, initial_std=0.), fwd_inner_param_attr=Attr.Param(self.name + '_f_enc_inn.w' + type), bwd_mat_param_attr=Attr.Param(self.name + '_b_enc_mat.w' + type), bwd_bias_param_attr=Attr.Param(self.name + '_b_enc.bias' + type, initial_std=0.), bwd_inner_param_attr=Attr.Param(self.name + '_b_enc_inn.w' + type), return_seq=True) enc_dropped = self.drop_out(enc, drop_rate=0.25) return enc_dropped
def get_enc(self, input, type='q'): """ Encodes the input by feeding it into a bidirectional lstm and concatenates the forward and backward expression of each time step. """ embs = self.get_embs(input) enc = paddle.networks.bidirectional_lstm( input=embs, size=self.emb_dim, fwd_mat_param_attr=Attr.Param('f_enc_mat.w' + type), fwd_bias_param_attr=Attr.Param('f_enc.bias' + type, initial_std=0.), fwd_inner_param_attr=Attr.Param('f_enc_inn.w' + type), bwd_mat_param_attr=Attr.Param('b_enc_mat.w' + type), bwd_bias_param_attr=Attr.Param('b_enc.bias' + type, initial_std=0.), bwd_inner_param_attr=Attr.Param('b_enc_inn.w' + type), return_seq=True) enc_dropped = self.drop_out(enc, drop_rate=0.5) return enc_dropped
def network(self): """ Implements the whole network. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ self.check_and_create_data() self.create_shared_params() u = self._get_enc(self.q_ids, type='q') m1s = [] m2s = [] for p in self.p_ids: h = self._get_enc(p, type='q') g = self._attention_flow(h, u) m1 = networks.bidirectional_lstm( fwd_mat_param_attr=Attr.Param('_f_m1_mat.w'), fwd_bias_param_attr=Attr.Param('_f_m1.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('_f_m1_inn.w'), bwd_mat_param_attr=Attr.Param('_b_m1_mat.w'), bwd_bias_param_attr=Attr.Param('_b_m1.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('_b_m1_inn.w'), input=g, size=self.emb_dim, return_seq=True) m1_dropped = self.drop_out(m1, drop_rate=0.) cat_g_m1 = layer.concat(input=[g, m1_dropped]) m2 = networks.bidirectional_lstm( fwd_mat_param_attr=Attr.Param('_f_m2_mat.w'), fwd_bias_param_attr=Attr.Param('_f_m2.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('_f_m2_inn.w'), bwd_mat_param_attr=Attr.Param('_b_m2_mat.w'), bwd_bias_param_attr=Attr.Param('_b_m2.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('_b_m2_inn.w'), input=m1, size=self.emb_dim, return_seq=True) m2_dropped = self.drop_out(m2, drop_rate=0.) cat_g_m2 = layer.concat(input=[g, m2_dropped]) m1s.append(cat_g_m1) m2s.append(cat_g_m2) all_m1 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m1s) all_m2 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m2s) start = self.decode('start', all_m1) end = self.decode('end', all_m2) return start, end
import paddle.v2.data_type as data_type import paddle.v2.layer as layer import paddle.v2.pooling as pooling import paddle.v2.networks as networks pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) label = layer.data(name='label', type=data_type.integer_value(10)) weight = layer.data(name='weight', type=data_type.dense_vector(1)) combine_weight = layer.data(name='weight_combine', type=data_type.dense_vector(10)) score = layer.data(name='score', type=data_type.dense_vector(1)) hidden = layer.fc(input=pixel, size=100, act=activation.Sigmoid(), param_attr=attr.Param(name='hidden')) inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) conv = layer.img_conv(input=pixel, filter_size=1, filter_size_y=1, num_channels=8, num_filters=16, act=activation.Linear()) class ImageLayerTest(unittest.TestCase): def test_conv_layer(self): conv_shift = layer.conv_shift(a=pixel, b=score) print layer.parse_network(conv, conv_shift) def test_pooling_layer(self):
def _step(self, name, h_q_all, q_proj, h_p_cur, qe_comm, ee_comm): """ Match-LSTM step. This function performs operations done in one time step. Args: h_p_cur: Current hidden of paragraph encodings: h_i. This is the `REAL` input of the group, like x_t in normal rnn. h_q_all: Question encodings. Returns: The $h^{r}_{i}$ in the paper. """ conf = mLSTM_crf_config.TrainingConfig() direct = 'left' if 'left' in name else 'right' # 获取上一个时间步的输出 h_r_prev = paddle.layer.memory(name=name + '_out_', size=h_q_all.size, boot_layer=None) # h_p_cur :: Current hidden of paragraph encodings # h_q_all :: q wordEmbedding # q_proj :: q_proj_(left or right) q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj) z_cur = self.fusion_layer(h_p_cur, q_expr) # feature embeddings comm_initial_std = 1 / math.sqrt(64.0) qe_comm_emb = paddle.layer.embedding(input=qe_comm, size=conf.com_vec_dim, param_attr=paddle.attr.ParamAttr( name="_cw_embedding.w0", initial_std=comm_initial_std, l2_rate=conf.default_l2_rate)) ee_comm_emb = paddle.layer.embedding(input=ee_comm, size=conf.com_vec_dim, param_attr=paddle.attr.ParamAttr( name="_eecom_embedding.w0", initial_std=comm_initial_std, l2_rate=conf.default_l2_rate)) # layer.mixed :: 综合输入映射到指定维度,为 lstm 的输入做准备! with layer.mixed(size=h_q_all.size * 4, act=Act.Tanh(), bias_attr=False) as match_input: match_input += layer.full_matrix_projection( input=z_cur, param_attr=Attr.Param('match_input_z_%s.w0' % direct)) match_input += layer.full_matrix_projection( input=qe_comm_emb, param_attr=Attr.Param('match_input_qe_%s.w0' % direct)) match_input += layer.full_matrix_projection( input=ee_comm_emb, param_attr=Attr.Param('match_input_ee_%s.w0' % direct)) step_out = paddle.networks.lstmemory_unit( name=name + '_out_', out_memory=h_r_prev, param_attr=Attr.Param('step_lstm_%s.w' % direct), input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct, initial_std=0.), lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct, initial_std=0.), input=match_input, size=h_q_all.size) return step_out