def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection( input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection( input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection(input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection(input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def _step_basic(self, h_cur, u): expanded_h = layer.expand(input=h_cur, expand_as=u) hu = layer.concat(input=[expanded_h, u]) with layer.mixed(bias_attr=False) as dot_hu: dot_hu += layer.dotmul_operator(a=expanded_h, b=u) cat_all = layer.concat(input=[hu, dot_hu]) s = layer.fc(size=1, bias_attr=False, param_attr=Attr.Param(self.name + '.ws'), input=cat_all) return s
def test_reshape_layer(self): block_expand = layer.block_expand( input=conv, num_channels=4, stride_x=1, block_x=1) expand = layer.expand( input=weight, expand_as=pixel, expand_level=layer.ExpandLevel.FROM_TIMESTEP) repeat = layer.repeat(input=pixel, num_repeats=4) reshape = layer.seq_reshape(input=pixel, reshape_size=4) rotate = layer.rotate(input=pixel, height=16, width=49) print layer.parse_network(block_expand, expand, repeat, reshape, rotate)
def test_reshape_layer(self): block_expand = layer.block_expand( input=conv, num_channels=4, stride_x=1, block_x=1) expand = layer.expand( input=weight, expand_as=pixel, expand_level=layer.ExpandLevel.FROM_NO_SEQUENCE) repeat = layer.repeat(input=pixel, num_repeats=4) reshape = layer.seq_reshape(input=pixel, reshape_size=4) rotate = layer.rotate(input=pixel, height=16, width=49) print layer.parse_network( [block_expand, expand, repeat, reshape, rotate])
def _attention_flow(self, h, u): bs = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._h_step, reverse=False) b_weights = layer.mixed(act=Act.SequenceSoftmax(), bias_attr=False, input=layer.identity_projection(bs)) h_step_scaled = layer.scaling(input=h, weight=b_weights) h_step = layer.pooling(input=h_step_scaled, pooling_type=paddle.pooling.Sum()) h_expr = layer.expand(input=h_step, expand_as=h) u_expr = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._u_step, reverse=False) g = self._beta(h, u_expr, h_expr) return g