def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection( input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection( input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection(input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection(input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def test_projection(self): input = layer.data(name='data2', type=data_type.dense_vector(784)) word = layer.data( name='word2', type=data_type.integer_value_sequence(10000)) fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed( size=256, input=[ layer.full_matrix_projection(input=fc0), layer.full_matrix_projection(input=fc1) ]) with layer.mixed(size=200) as mixed1: mixed1 += layer.full_matrix_projection(input=fc0) mixed1 += layer.identity_projection(input=fc1) table = layer.table_projection(input=word) emb0 = layer.mixed(size=512, input=table) with layer.mixed(size=512) as emb1: emb1 += table scale = layer.scaling_projection(input=fc0) scale0 = layer.mixed(size=100, input=scale) with layer.mixed(size=100) as scale1: scale1 += scale dotmul = layer.dotmul_projection(input=fc0) dotmul0 = layer.mixed(size=100, input=dotmul) with layer.mixed(size=100) as dotmul1: dotmul1 += dotmul context = layer.context_projection(input=fc0, context_len=5) context0 = layer.mixed(size=500, input=context) with layer.mixed(size=500) as context1: context1 += context conv = layer.conv_projection( input=input, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv, bias_attr=True) with layer.mixed(bias_attr=True) as conv1: conv1 += conv print layer.parse_network(mixed0) print layer.parse_network(mixed1) print layer.parse_network(emb0) print layer.parse_network(emb1) print layer.parse_network(scale0) print layer.parse_network(scale1) print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def test_projection(self): input = layer.data(name='data', type=data_type.dense_vector(784)) word = layer.data( name='word', type=data_type.integer_value_sequence(10000)) fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed( size=256, input=[ layer.full_matrix_projection(input=fc0), layer.full_matrix_projection(input=fc1) ]) with layer.mixed(size=200) as mixed1: mixed1 += layer.full_matrix_projection(input=fc0) mixed1 += layer.identity_projection(input=fc1) table = layer.table_projection(input=word) emb0 = layer.mixed(size=512, input=table) with layer.mixed(size=512) as emb1: emb1 += table scale = layer.scaling_projection(input=fc0) scale0 = layer.mixed(size=100, input=scale) with layer.mixed(size=100) as scale1: scale1 += scale dotmul = layer.dotmul_projection(input=fc0) dotmul0 = layer.mixed(size=100, input=dotmul) with layer.mixed(size=100) as dotmul1: dotmul1 += dotmul context = layer.context_projection(input=fc0, context_len=5) context0 = layer.mixed(size=100, input=context) with layer.mixed(size=100) as context1: context1 += context conv = layer.conv_projection( input=input, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv, bias_attr=True) with layer.mixed(bias_attr=True) as conv1: conv1 += conv print layer.parse_network(mixed0) print layer.parse_network(mixed1) print layer.parse_network(emb0) print layer.parse_network(emb1) print layer.parse_network(scale0) print layer.parse_network(scale1) print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def _step(self, name, h_q_all, q_proj, h_p_cur): """ Match-LSTM step. This function performs operations done in one time step. Args: h_p_cur: Current hidden of paragraph encodings: h_i. This is the `REAL` input of the group, like x_t in normal rnn. h_q_all: Question encodings. Returns: The $h^{r}_{i}$ in the paper. """ direct = 'left' if 'left' in name else 'right' h_r_prev = paddle.layer.memory(name=name + '_out_', size=h_q_all.size, boot_layer=None) q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj) z_cur = self.fusion_layer(h_p_cur, q_expr) with layer.mixed(size=h_q_all.size * 4, act=Act.Tanh(), bias_attr=False) as match_input: match_input += layer.full_matrix_projection( input=z_cur, param_attr=Attr.Param('match_input_%s.w0' % direct)) step_out = paddle.networks.lstmemory_unit( name=name + '_out_', out_memory=h_r_prev, param_attr=Attr.Param('step_lstm_%s.w' % direct), input_proj_bias_attr=Attr.Param( 'step_lstm_mixed_%s.bias' % direct, initial_std=0.), lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct, initial_std=0.), input=match_input, size=h_q_all.size) return step_out
def _step(self, name, h_q_all, q_proj, h_p_cur): """ Match-LSTM step. This function performs operations done in one time step. Args: h_p_cur: Current hidden of paragraph encodings: h_i. This is the `REAL` input of the group, like x_t in normal rnn. h_q_all: Question encodings. Returns: The $h^{r}_{i}$ in the paper. """ direct = 'left' if 'left' in name else 'right' h_r_prev = paddle.layer.memory(name=name + '_out_', size=h_q_all.size, boot_layer=None) q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj) z_cur = self.fusion_layer(h_p_cur, q_expr) with layer.mixed(size=h_q_all.size * 4, act=Act.Tanh(), bias_attr=False) as match_input: match_input += layer.full_matrix_projection( input=z_cur, param_attr=Attr.Param('match_input_%s.w0' % direct)) step_out = paddle.networks.lstmemory_unit( name=name + '_out_', out_memory=h_r_prev, param_attr=Attr.Param('step_lstm_%s.w' % direct), input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct, initial_std=0.), lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct, initial_std=0.), input=match_input, size=h_q_all.size) return step_out
def _step(self, name, h_q_all, q_proj, h_p_cur, qe_comm, ee_comm): """ Match-LSTM step. This function performs operations done in one time step. Args: h_p_cur: Current hidden of paragraph encodings: h_i. This is the `REAL` input of the group, like x_t in normal rnn. h_q_all: Question encodings. Returns: The $h^{r}_{i}$ in the paper. """ conf = mLSTM_crf_config.TrainingConfig() direct = 'left' if 'left' in name else 'right' # 获取上一个时间步的输出 h_r_prev = paddle.layer.memory(name=name + '_out_', size=h_q_all.size, boot_layer=None) # h_p_cur :: Current hidden of paragraph encodings # h_q_all :: q wordEmbedding # q_proj :: q_proj_(left or right) q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj) z_cur = self.fusion_layer(h_p_cur, q_expr) # feature embeddings comm_initial_std = 1 / math.sqrt(64.0) qe_comm_emb = paddle.layer.embedding(input=qe_comm, size=conf.com_vec_dim, param_attr=paddle.attr.ParamAttr( name="_cw_embedding.w0", initial_std=comm_initial_std, l2_rate=conf.default_l2_rate)) ee_comm_emb = paddle.layer.embedding(input=ee_comm, size=conf.com_vec_dim, param_attr=paddle.attr.ParamAttr( name="_eecom_embedding.w0", initial_std=comm_initial_std, l2_rate=conf.default_l2_rate)) # layer.mixed :: 综合输入映射到指定维度,为 lstm 的输入做准备! with layer.mixed(size=h_q_all.size * 4, act=Act.Tanh(), bias_attr=False) as match_input: match_input += layer.full_matrix_projection( input=z_cur, param_attr=Attr.Param('match_input_z_%s.w0' % direct)) match_input += layer.full_matrix_projection( input=qe_comm_emb, param_attr=Attr.Param('match_input_qe_%s.w0' % direct)) match_input += layer.full_matrix_projection( input=ee_comm_emb, param_attr=Attr.Param('match_input_ee_%s.w0' % direct)) step_out = paddle.networks.lstmemory_unit( name=name + '_out_', out_memory=h_r_prev, param_attr=Attr.Param('step_lstm_%s.w' % direct), input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct, initial_std=0.), lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct, initial_std=0.), input=match_input, size=h_q_all.size) return step_out