def _step_basic(self, h_cur, u): expanded_h = layer.expand(input=h_cur, expand_as=u) hu = layer.concat(input=[expanded_h, u]) with layer.mixed(bias_attr=False) as dot_hu: dot_hu += layer.dotmul_operator(a=expanded_h, b=u) cat_all = layer.concat(input=[hu, dot_hu]) s = layer.fc(size=1, bias_attr=False, param_attr=Attr.Param(self.name + '.ws'), input=cat_all) return s
def network(self): """ Implements the whole network. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ self.check_and_create_data() self.create_shared_params() u = self._get_enc(self.q_ids, type='q') m1s = [] m2s = [] for p in self.p_ids: h = self._get_enc(p, type='q') g = self._attention_flow(h, u) m1 = networks.bidirectional_lstm( fwd_mat_param_attr=Attr.Param('_f_m1_mat.w'), fwd_bias_param_attr=Attr.Param('_f_m1.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('_f_m1_inn.w'), bwd_mat_param_attr=Attr.Param('_b_m1_mat.w'), bwd_bias_param_attr=Attr.Param('_b_m1.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('_b_m1_inn.w'), input=g, size=self.emb_dim, return_seq=True) m1_dropped = self.drop_out(m1, drop_rate=0.) cat_g_m1 = layer.concat(input=[g, m1_dropped]) m2 = networks.bidirectional_lstm( fwd_mat_param_attr=Attr.Param('_f_m2_mat.w'), fwd_bias_param_attr=Attr.Param('_f_m2.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('_f_m2_inn.w'), bwd_mat_param_attr=Attr.Param('_b_m2_mat.w'), bwd_bias_param_attr=Attr.Param('_b_m2.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('_b_m2_inn.w'), input=m1, size=self.emb_dim, return_seq=True) m2_dropped = self.drop_out(m2, drop_rate=0.) cat_g_m2 = layer.concat(input=[g, m2_dropped]) m1s.append(cat_g_m1) m2s.append(cat_g_m2) all_m1 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m1s) all_m2 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m2s) start = self.decode('start', all_m1) end = self.decode('end', all_m2) return start, end
def _beta(self, h, u_expr, h_expr): with layer.mixed(bias_attr=False) as dot_h_u_expr: dot_h_u_expr += layer.dotmul_operator(a=h, b=u_expr) with layer.mixed(bias_attr=False) as dot_h_h_expr: dot_h_h_expr += layer.dotmul_operator(a=h, b=h_expr) cat_all = layer.concat(input=[h, u_expr, dot_h_u_expr, dot_h_h_expr]) return cat_all
def network(self): """ Implements the whole network of Match-LSTM. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') p_encs = [] p_matches = [] for p in self.p_ids: p_encs.append(self.get_enc(p, type='p')) q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param( self.name + '_left_' + '.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param( self.name + '_right_' + '.wq'), input=q_enc) for i, p in enumerate(p_encs): left_out = self.recurrent_group( self.name + '_left_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), p], reverse=False) right_out = self.recurrent_group( self.name + '_right_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), p], reverse=True) match_seq = layer.concat(input=[left_out, right_out]) match_seq_dropped = self.drop_out(match_seq, drop_rate=0.5) bi_match_seq = paddle.networks.bidirectional_lstm( input=match_seq_dropped, size=match_seq.size, fwd_mat_param_attr=Attr.Param('pn_f_enc_mat.w'), fwd_bias_param_attr=Attr.Param('pn_f_enc.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('pn_f_enc_inn.w'), bwd_mat_param_attr=Attr.Param('pn_b_enc_mat.w'), bwd_bias_param_attr=Attr.Param('pn_b_enc.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('pn_b_enc_inn.w'), return_seq=True) p_matches.append(bi_match_seq) all_docs = reduce(lambda x, y: layer.seq_concat(a=x, b=y), p_matches) all_docs_dropped = self.drop_out(all_docs, drop_rate=0.5) start = self.decode('start', all_docs_dropped) end = self.decode('end', all_docs_dropped) return start, end
def _build_regression_model(self, dnn, lr): merge_layer = layer.concat(input=[dnn, lr]) self.output = layer.fc( input=merge_layer, size=1, act=paddle.activation.Sigmoid()) if not self.is_infer: self.train_cost = paddle.layer.mse_cost( input=self.output, label=self.click) return self.output
def network(self): """ Implements the whole network of Match-LSTM. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') p_encs = [] p_matches = [] for p in self.p_ids: p_encs.append(self.get_enc(p, type='p')) q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left_' + '.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right_' + '.wq'), input=q_enc) for i, p in enumerate(p_encs): left_out = self.recurrent_group( self.name + '_left_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), p], reverse=False) right_out = self.recurrent_group( self.name + '_right_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), p], reverse=True) match_seq = layer.concat(input=[left_out, right_out]) match_seq_dropped = self.drop_out(match_seq, drop_rate=0.5) bi_match_seq = paddle.networks.bidirectional_lstm( input=match_seq_dropped, size=match_seq.size, fwd_mat_param_attr=Attr.Param('pn_f_enc_mat.w'), fwd_bias_param_attr=Attr.Param('pn_f_enc.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('pn_f_enc_inn.w'), bwd_mat_param_attr=Attr.Param('pn_b_enc_mat.w'), bwd_bias_param_attr=Attr.Param('pn_b_enc.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('pn_b_enc_inn.w'), return_seq=True) p_matches.append(bi_match_seq) all_docs = reduce(lambda x, y: layer.seq_concat(a=x, b=y), p_matches) all_docs_dropped = self.drop_out(all_docs, drop_rate=0.5) start = self.decode('start', all_docs_dropped) end = self.decode('end', all_docs_dropped) return start, end
def test_aggregate_layer(self): pool = layer.pooling(input=pixel, pooling_type=pooling.Avg(), agg_level=layer.AggregateLevel.EACH_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) seq_concat = layer.seq_concat(a=last_seq, b=first_seq) print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
def test_aggregate_layer(self): pool = layer.pooling( input=pixel, pooling_type=pooling.Avg(), agg_level=layer.AggregateLevel.TO_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) seq_concat = layer.seq_concat(a=last_seq, b=first_seq) print layer.parse_network( [pool, last_seq, first_seq, concat, seq_concat])
def _combine_submodels_(self, dnn, lr): ''' conbine DNN and LR submodels ''' merge_layer = layer.concat(input=[dnn, lr]) fc = layer.fc( input=merge_layer, size=1, name='output', # use sigmoid function to approximate ctr rate, a float value between 0 and 1. act=paddle.activation.Sigmoid()) return fc
def _build_classification_model(self, dnn, lr): merge_layer = layer.concat(input=[dnn, lr]) self.output = layer.fc( input=merge_layer, size=1, # use sigmoid function to approximate ctr rate, a float value between 0 and 1. act=paddle.activation.Sigmoid()) if not self.is_infer: self.train_cost = paddle.layer.multi_binary_label_cross_entropy_cost( input=self.output, label=self.click) return self.output
def _build_classification_model(self, dnn, lr): merge_layer = layer.concat(input=[dnn, lr]) self.output = layer.fc( input=merge_layer, size=1, # 利用sigmoid函数预估CTR比率 act=paddle.activation.Sigmoid()) if not self.is_infer: self.train_cost = paddle.layer.multi_binary_label_cross_entropy_cost( input=self.output, label=self.click) return self.output
def fusion_layer(self, input1, input2): """ Combine input1 and input2 by concat(input1 .* input2, input1 - input2, input1, input2) """ # fusion layer neg_input2 = layer.slope_intercept(input=input2, slope=-1.0, intercept=0.0) diff1 = layer.addto(input=[input1, neg_input2], act=Act.Identity(), bias_attr=False) diff2 = layer.mixed(bias_attr=False, input=layer.dotmul_operator(a=input1, b=input2)) fused = layer.concat(input=[input1, input2, diff1, diff2]) return fused
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group( self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group( self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group(self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group(self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls
def network(self, question, evidence, qe_comm, ee_comm, conf): """ Implements the whole network of Match-LSTM. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ q_enc = self.get_enc(question, conf, type='q') p_enc = self.get_enc(evidence, conf, type='q') q_proj_left = layer.fc(size=conf.word_vec_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left_' + '.wq'), input=q_enc) q_proj_right = layer.fc(size=conf.word_vec_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right_' + '.wq'), input=q_enc) # StaticInput 定义了一个只读的Memory,由StaticInput指定的输入不会被recurrent_group拆解, # recurrent_group 循环展开的每个时间步总是能够引用所有输入,可以是一个非序列,或者一个单层序列。 left_out = self.recurrent_group(self.name + '_left', [ layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), p_enc, qe_comm, ee_comm ], reverse=False) right_out = self.recurrent_group(self.name + '_right_', [ layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), p_enc, qe_comm, ee_comm ], reverse=True) match_seq = layer.concat(input=[left_out, right_out]) return self.drop_out(match_seq, drop_rate=0.5)