def get_loss(self, start_prob, end_prob, start_label, end_label): """ Compute the loss: $l_{\theta} = -logP(start)\cdotP(end|start)$ Returns: A LayerOutput object containing loss. """ probs = layer.seq_concat(a=start_prob, b=end_prob) labels = layer.seq_concat(a=start_label, b=end_label) log_probs = layer.mixed( size=probs.size, act=Act.Log(), bias_attr=False, input=paddle.layer.identity_projection(probs)) neg_log_probs = layer.slope_intercept( input=log_probs, slope=-1, intercept=0) loss = paddle.layer.mixed( size=1, input=paddle.layer.dotmul_operator(a=neg_log_probs, b=labels)) sum_val = paddle.layer.pooling(input=loss, pooling_type=paddle.pooling.Sum()) cost = paddle.layer.sum_cost(input=sum_val) return cost
def network(self): """ Implements the whole network. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ self.check_and_create_data() self.create_shared_params() u = self._get_enc(self.q_ids, type='q') m1s = [] m2s = [] for p in self.p_ids: h = self._get_enc(p, type='q') g = self._attention_flow(h, u) m1 = networks.bidirectional_lstm( fwd_mat_param_attr=Attr.Param('_f_m1_mat.w'), fwd_bias_param_attr=Attr.Param('_f_m1.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('_f_m1_inn.w'), bwd_mat_param_attr=Attr.Param('_b_m1_mat.w'), bwd_bias_param_attr=Attr.Param('_b_m1.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('_b_m1_inn.w'), input=g, size=self.emb_dim, return_seq=True) m1_dropped = self.drop_out(m1, drop_rate=0.) cat_g_m1 = layer.concat(input=[g, m1_dropped]) m2 = networks.bidirectional_lstm( fwd_mat_param_attr=Attr.Param('_f_m2_mat.w'), fwd_bias_param_attr=Attr.Param('_f_m2.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('_f_m2_inn.w'), bwd_mat_param_attr=Attr.Param('_b_m2_mat.w'), bwd_bias_param_attr=Attr.Param('_b_m2.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('_b_m2_inn.w'), input=m1, size=self.emb_dim, return_seq=True) m2_dropped = self.drop_out(m2, drop_rate=0.) cat_g_m2 = layer.concat(input=[g, m2_dropped]) m1s.append(cat_g_m1) m2s.append(cat_g_m2) all_m1 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m1s) all_m2 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m2s) start = self.decode('start', all_m1) end = self.decode('end', all_m2) return start, end
def check_and_create_data(self): """ Checks if the input data is legal and creates the data layers according to the input fields. """ if self.is_infer: expected = ['q_ids', 'p_ids', 'para_length', '[start_label, end_label, ...]'] if len(self.inputs) < 2 * self.doc_num + 1: raise ValueError(r'''Input schema: expected vs given: {} vs {}'''.format(expected, self.inputs)) else: expected = ['q_ids', 'p_ids', 'para_length', 'start_label', 'end_label', '...'] if len(self.inputs) < 4 * self.doc_num + 1: raise ValueError(r'''Input schema: expected vs given: {} vs {}'''.format(expected, self.inputs)) self.start_labels = [] for i in range(1 + 2 * self.doc_num, 1 + 3 * self.doc_num): self.start_labels.append( layer.data(name=self.inputs[i], type=data_type.dense_vector_sequence(1))) self.start_label = reduce( lambda x, y: layer.seq_concat(a=x, b=y), self.start_labels) self.end_labels = [] for i in range(1 + 3 * self.doc_num, 1 + 4 * self.doc_num): self.end_labels.append( layer.data(name=self.inputs[i], type=data_type.dense_vector_sequence(1))) self.end_label = reduce( lambda x, y: layer.seq_concat(a=x, b=y), self.end_labels) self.q_ids = layer.data( name=self.inputs[0], type=data_type.integer_value_sequence(self.vocab_size)) self.p_ids = [] for i in range(1, 1 + self.doc_num): self.p_ids.append( layer.data(name=self.inputs[i], type=data_type.integer_value_sequence(self.vocab_size))) self.para_lens = [] for i in range(1 + self.doc_num, 1 + 2 * self.doc_num): self.para_lens.append( layer.data(name=self.inputs[i], type=data_type.dense_vector_sequence(1))) self.para_len = reduce(lambda x, y: layer.seq_concat(a=x, b=y), self.para_lens)
def network(self): """ Implements the whole network of Match-LSTM. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') p_encs = [] p_matches = [] for p in self.p_ids: p_encs.append(self.get_enc(p, type='p')) q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param( self.name + '_left_' + '.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param( self.name + '_right_' + '.wq'), input=q_enc) for i, p in enumerate(p_encs): left_out = self.recurrent_group( self.name + '_left_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), p], reverse=False) right_out = self.recurrent_group( self.name + '_right_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), p], reverse=True) match_seq = layer.concat(input=[left_out, right_out]) match_seq_dropped = self.drop_out(match_seq, drop_rate=0.5) bi_match_seq = paddle.networks.bidirectional_lstm( input=match_seq_dropped, size=match_seq.size, fwd_mat_param_attr=Attr.Param('pn_f_enc_mat.w'), fwd_bias_param_attr=Attr.Param('pn_f_enc.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('pn_f_enc_inn.w'), bwd_mat_param_attr=Attr.Param('pn_b_enc_mat.w'), bwd_bias_param_attr=Attr.Param('pn_b_enc.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('pn_b_enc_inn.w'), return_seq=True) p_matches.append(bi_match_seq) all_docs = reduce(lambda x, y: layer.seq_concat(a=x, b=y), p_matches) all_docs_dropped = self.drop_out(all_docs, drop_rate=0.5) start = self.decode('start', all_docs_dropped) end = self.decode('end', all_docs_dropped) return start, end
def network(self): """ Implements the whole network of Match-LSTM. Returns: A tuple of LayerOutput objects containing the start and end probability distributions respectively. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') p_encs = [] p_matches = [] for p in self.p_ids: p_encs.append(self.get_enc(p, type='p')) q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left_' + '.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right_' + '.wq'), input=q_enc) for i, p in enumerate(p_encs): left_out = self.recurrent_group( self.name + '_left_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), p], reverse=False) right_out = self.recurrent_group( self.name + '_right_' + str(i), [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), p], reverse=True) match_seq = layer.concat(input=[left_out, right_out]) match_seq_dropped = self.drop_out(match_seq, drop_rate=0.5) bi_match_seq = paddle.networks.bidirectional_lstm( input=match_seq_dropped, size=match_seq.size, fwd_mat_param_attr=Attr.Param('pn_f_enc_mat.w'), fwd_bias_param_attr=Attr.Param('pn_f_enc.bias', initial_std=0.), fwd_inner_param_attr=Attr.Param('pn_f_enc_inn.w'), bwd_mat_param_attr=Attr.Param('pn_b_enc_mat.w'), bwd_bias_param_attr=Attr.Param('pn_b_enc.bias', initial_std=0.), bwd_inner_param_attr=Attr.Param('pn_b_enc_inn.w'), return_seq=True) p_matches.append(bi_match_seq) all_docs = reduce(lambda x, y: layer.seq_concat(a=x, b=y), p_matches) all_docs_dropped = self.drop_out(all_docs, drop_rate=0.5) start = self.decode('start', all_docs_dropped) end = self.decode('end', all_docs_dropped) return start, end
def test_aggregate_layer(self): pool = layer.pooling(input=pixel, pooling_type=pooling.Avg(), agg_level=layer.AggregateLevel.EACH_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) seq_concat = layer.seq_concat(a=last_seq, b=first_seq) print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
def test_aggregate_layer(self): pool = layer.pooling( input=pixel, pooling_type=pooling.Avg(), agg_level=layer.AggregateLevel.TO_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) seq_concat = layer.seq_concat(a=last_seq, b=first_seq) print layer.parse_network( [pool, last_seq, first_seq, concat, seq_concat])
def infer(self): """ The inferring interface. Returns: start_end: A sequence of concatenated start and end probabilities. para_len: A sequence of the lengths of every paragraph, which is used for parse the inferring output. """ start, end = self.network() start_end = layer.seq_concat(name='start_end', a=start, b=end) return start_end, self.para_len