def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection(input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection(input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
weight = layer.data(name='weight', type=data_type.dense_vector(1)) combine_weight = layer.data(name='weight_combine', type=data_type.dense_vector(10)) score = layer.data(name='score', type=data_type.dense_vector(1)) hidden = layer.fc(input=pixel, size=100, act=activation.Sigmoid(), param_attr=attr.Param(name='hidden')) inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) conv = layer.img_conv(input=pixel, filter_size=1, filter_size_y=1, num_channels=8, num_filters=16, act=activation.Linear()) class ImageLayerTest(unittest.TestCase): def test_conv_layer(self): conv_shift = layer.conv_shift(a=pixel, b=score) print layer.parse_network(conv, conv_shift) def test_pooling_layer(self): maxpool = layer.img_pool(input=conv, pool_size=2, num_channels=16, padding=1, pool_type=pooling.Max()) spp = layer.spp(input=conv, pyramid_height=2,