def _u_step(self, h_cur, u): s = self._step_basic(h_cur, u) with layer.mixed(size=1, bias_attr=False, act=Act.SequenceSoftmax()) as h_weights: h_weights += layer.identity_projection(s) applied_weights = layer.scaling(input=u, weight=h_weights) u_ctx = layer.pooling(input=applied_weights, pooling_type=paddle.pooling.Sum()) return u_ctx
def test_projection(self): input = layer.data(name='data', type=data_type.dense_vector(784)) word = layer.data( name='word', type=data_type.integer_value_sequence(10000)) fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed( size=256, input=[ layer.full_matrix_projection(input=fc0), layer.full_matrix_projection(input=fc1) ]) with layer.mixed(size=200) as mixed1: mixed1 += layer.full_matrix_projection(input=fc0) mixed1 += layer.identity_projection(input=fc1) table = layer.table_projection(input=word) emb0 = layer.mixed(size=512, input=table) with layer.mixed(size=512) as emb1: emb1 += table scale = layer.scaling_projection(input=fc0) scale0 = layer.mixed(size=100, input=scale) with layer.mixed(size=100) as scale1: scale1 += scale dotmul = layer.dotmul_projection(input=fc0) dotmul0 = layer.mixed(size=100, input=dotmul) with layer.mixed(size=100) as dotmul1: dotmul1 += dotmul context = layer.context_projection(input=fc0, context_len=5) context0 = layer.mixed(size=100, input=context) with layer.mixed(size=100) as context1: context1 += context conv = layer.conv_projection( input=input, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv, bias_attr=True) with layer.mixed(bias_attr=True) as conv1: conv1 += conv print layer.parse_network(mixed0) print layer.parse_network(mixed1) print layer.parse_network(emb0) print layer.parse_network(emb1) print layer.parse_network(scale0) print layer.parse_network(scale1) print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def test_projection(self): input = layer.data(name='data2', type=data_type.dense_vector(784)) word = layer.data( name='word2', type=data_type.integer_value_sequence(10000)) fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed( size=256, input=[ layer.full_matrix_projection(input=fc0), layer.full_matrix_projection(input=fc1) ]) with layer.mixed(size=200) as mixed1: mixed1 += layer.full_matrix_projection(input=fc0) mixed1 += layer.identity_projection(input=fc1) table = layer.table_projection(input=word) emb0 = layer.mixed(size=512, input=table) with layer.mixed(size=512) as emb1: emb1 += table scale = layer.scaling_projection(input=fc0) scale0 = layer.mixed(size=100, input=scale) with layer.mixed(size=100) as scale1: scale1 += scale dotmul = layer.dotmul_projection(input=fc0) dotmul0 = layer.mixed(size=100, input=dotmul) with layer.mixed(size=100) as dotmul1: dotmul1 += dotmul context = layer.context_projection(input=fc0, context_len=5) context0 = layer.mixed(size=500, input=context) with layer.mixed(size=500) as context1: context1 += context conv = layer.conv_projection( input=input, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv, bias_attr=True) with layer.mixed(bias_attr=True) as conv1: conv1 += conv print layer.parse_network(mixed0) print layer.parse_network(mixed1) print layer.parse_network(emb0) print layer.parse_network(emb1) print layer.parse_network(scale0) print layer.parse_network(scale1) print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def _attention_flow(self, h, u): bs = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._h_step, reverse=False) b_weights = layer.mixed(act=Act.SequenceSoftmax(), bias_attr=False, input=layer.identity_projection(bs)) h_step_scaled = layer.scaling(input=h, weight=b_weights) h_step = layer.pooling(input=h_step_scaled, pooling_type=paddle.pooling.Sum()) h_expr = layer.expand(input=h_step, expand_as=h) u_expr = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._u_step, reverse=False) g = self._beta(h, u_expr, h_expr) return g
def drop_out(self, input, drop_rate=0.5): """ Implements drop out. Args: input: the LayerOutput needs to apply drop out. drop_rate: drop out rate. Returns: The layer output after applying drop out. """ with layer.mixed( layer_attr=Attr.ExtraLayerAttribute(drop_rate=drop_rate), bias_attr=False) as dropped: dropped += layer.identity_projection(input) return dropped
def drop_out(self, input, drop_rate=0.5): """ Implements drop out. Args: input: the LayerOutput needs to apply drop out. drop_rate: drop out rate. Returns: The layer output after applying drop out. """ with layer.mixed( layer_attr=Attr.ExtraLayerAttribute( drop_rate=drop_rate), bias_attr=False) as dropped: dropped += layer.identity_projection(input) return dropped
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group( self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group( self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls
def math_op(input, act=pd.activation.Linear(), op='dot', size=0): if not isinstance(input, list): input = [input] if len(input) == 1: # unary operation result = pd.mixed( input=[pd.identity_projection(input=input[0])], act=act) elif len(input) == 2: # binary operation if op == 'dot': result = pd.mixed(size=size, input=pd.dotmul_operator( a=input[0], b=input[1], scale=1.0), act=act) else: raise ValueError('not supporting math op with more than two\ input') return result
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group(self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group(self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls