Beispiel #1
0
    def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj):
        with layer.mixed(size=cur_token.size,
                         bias_attr=Attr.Param(direct + '.bp',
                             initial_std=0.),
                         act=Act.Linear()) as proj:
            proj += layer.full_matrix_projection(
                    input=cur_token,
                    param_attr=Attr.Param(direct + '.wp'))
            proj += layer.full_matrix_projection(
                    input=prev,
                    param_attr=Attr.Param(direct + '.wr'))

        expanded = layer.expand(input=proj, expand_as=to_apply)
        att_context = layer.addto(input=[expanded, to_apply_proj],
                                  act=Act.Tanh(),
                                  bias_attr=False)

        att_weights = layer.fc(input=att_context,
                               param_attr=Attr.Param(direct + '.w'),
                               bias_attr=Attr.Param(direct + '.b',
                                   initial_std=0.),
                               act=Act.SequenceSoftmax(),
                               size=1)
        scaled = layer.scaling(input=to_apply, weight=att_weights)
        applied = layer.pooling(input=scaled,
                                pooling_type=paddle.pooling.Sum())
        return applied
Beispiel #2
0
    def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj):
        with layer.mixed(size=cur_token.size,
                         bias_attr=Attr.Param(direct + '.bp', initial_std=0.),
                         act=Act.Linear()) as proj:
            proj += layer.full_matrix_projection(input=cur_token,
                                                 param_attr=Attr.Param(direct +
                                                                       '.wp'))
            proj += layer.full_matrix_projection(input=prev,
                                                 param_attr=Attr.Param(direct +
                                                                       '.wr'))

        expanded = layer.expand(input=proj, expand_as=to_apply)
        att_context = layer.addto(input=[expanded, to_apply_proj],
                                  act=Act.Tanh(),
                                  bias_attr=False)

        att_weights = layer.fc(input=att_context,
                               param_attr=Attr.Param(direct + '.w'),
                               bias_attr=Attr.Param(direct + '.b',
                                                    initial_std=0.),
                               act=Act.SequenceSoftmax(),
                               size=1)
        scaled = layer.scaling(input=to_apply, weight=att_weights)
        applied = layer.pooling(input=scaled,
                                pooling_type=paddle.pooling.Sum())
        return applied
Beispiel #3
0
 def _u_step(self, h_cur, u):
     s = self._step_basic(h_cur, u)
     with layer.mixed(size=1, bias_attr=False,
                      act=Act.SequenceSoftmax()) as h_weights:
         h_weights += layer.identity_projection(s)
     applied_weights = layer.scaling(input=u, weight=h_weights)
     u_ctx = layer.pooling(input=applied_weights,
                           pooling_type=paddle.pooling.Sum())
     return u_ctx
Beispiel #4
0
def sum_weighted_loss(loss, weight, size=1):
    """Loss has input batch_size x image_size, weight has input batch_size x weight
        ( i * w ) / sum(W)
       The output is normalized weighted loss
    """
    weighted_loss = pd.mixed(
        size=size, input=[pd.dotmul_operator(a=loss, b=weight, scale=1.0)])
    weight_fac = pd.sum_cost(input=weight)
    weight_fac = util_layers.math_op(input=weight_fac, act=pd.activation.Inv())
    weighted_loss = pd.scaling(input=loss, weight=weight_fac)
    weighted_loss = pd.sum_cost(input=weighted_loss)

    return weighted_loss
Beispiel #5
0
 def test_math_layer(self):
     addto = layer.addto(input=[pixel, pixel])
     linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10)
     interpolation = layer.interpolation(
         input=[hidden, hidden], weight=score)
     bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)
     power = layer.power(input=pixel, weight=score)
     scaling = layer.scaling(input=pixel, weight=score)
     slope = layer.slope_intercept(input=pixel)
     tensor = layer.tensor(a=pixel, b=pixel, size=1000)
     cos_sim = layer.cos_sim(a=pixel, b=pixel)
     trans = layer.trans(input=tensor)
     print layer.parse_network(addto, linear_comb, interpolation, power,
                               scaling, slope, tensor, cos_sim, trans)
Beispiel #6
0
def ns_ele_l2_cost(input,
                   label,
                   weight,
                   height,
                   width,
                   num_channel=None,
                   interp='nearest'):
    assert interp in image_resize_func.keys()
    # make sure all the input label and weight have the same size
    input = pd.bilinear_interp(input=input,
                               out_size_x=width,
                               out_size_y=height)
    label = image_resize_func[interp](input=label,
                                      out_size_x=width,
                                      out_size_y=height)
    weight = image_resize_func[interp](input=weight,
                                       out_size_x=width,
                                       out_size_y=height)

    # reshape the orignal layer
    # input has shape  c x h x w change to h x w x c
    input_ts = pd.transpose(input=input,
                            trans_order=[1, 2, 0],
                            height=height,
                            width=width)
    input_rs = pd.resize(input=input_ts, size=num_channel, height=1, width=1)

    label_ts = pd.transpose(input=label,
                            trans_order=[1, 2, 0],
                            height=height,
                            width=width)
    label_rs = pd.resize(input=label_ts, size=num_channel, height=1, width=1)
    weight_rs = pd.resize(input=weight, size=1, height=1, width=1)

    cost_rs = pd.mse_cost(input=input_rs, label=label_rs)
    sqrt_l2_cost = util_layers.math_op(input=cost_rs, act=pd.activation.Sqrt())
    sqrt_l2_cost = pd.mixed(
        size=1,
        input=[pd.dotmul_operator(a=sqrt_l2_cost, b=weight_rs, scale=1.0)])
    sqrt_l2_cost = pd.resize(input=sqrt_l2_cost,
                             size=height * width,
                             height=height,
                             width=width)

    weight_fac = pd.sum_cost(input=weight)
    weight_fac = util_layers.math_op(input=weight_fac, act=pd.activation.Inv())
    sqrt_l2_cost = pd.scaling(input=sqrt_l2_cost, weight=weight_fac)
    cost = pd.sum_cost(input=sqrt_l2_cost)

    return cost
Beispiel #7
0
 def _attention_flow(self, h, u):
     bs = layer.recurrent_group(input=[h, layer.StaticInput(u)],
                                step=self._h_step,
                                reverse=False)
     b_weights = layer.mixed(act=Act.SequenceSoftmax(),
                             bias_attr=False,
                             input=layer.identity_projection(bs))
     h_step_scaled = layer.scaling(input=h, weight=b_weights)
     h_step = layer.pooling(input=h_step_scaled,
                            pooling_type=paddle.pooling.Sum())
     h_expr = layer.expand(input=h_step, expand_as=h)
     u_expr = layer.recurrent_group(input=[h, layer.StaticInput(u)],
                                    step=self._u_step,
                                    reverse=False)
     g = self._beta(h, u_expr, h_expr)
     return g
Beispiel #8
0
 def test_math_layer(self):
     addto = layer.addto(input=[pixel, pixel])
     linear_comb = layer.linear_comb(
         weights=combine_weight, vectors=hidden, size=10)
     interpolation = layer.interpolation(
         input=[hidden, hidden], weight=score)
     bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)
     power = layer.power(input=pixel, weight=score)
     scaling = layer.scaling(input=pixel, weight=score)
     slope = layer.slope_intercept(input=pixel)
     tensor = layer.tensor(a=pixel, b=pixel, size=1000)
     cos_sim = layer.cos_sim(a=pixel, b=pixel)
     trans = layer.trans(input=tensor)
     print layer.parse_network([
         addto, linear_comb, interpolation, power, scaling, slope, tensor,
         cos_sim, trans
     ])
Beispiel #9
0
def ele_norm_cost(input,
                  label,
                  weight,
                  height=None,
                  width=None,
                  num_channel=None,
                  cost_type='l1'):
    if height > 1 and width > 1:
        input = pd.bilinear_interp(input=input,
                                   out_size_x=width,
                                   out_size_y=height)
        label = pd.bilinear_interp(input=label,
                                   out_size_x=width,
                                   out_size_y=height)
        if weight:
            weight = pd.nearest_interp(input=weight,
                                       out_size_x=width,
                                       out_size_y=height)

    size = height * width * num_channel
    if weight:
        input = pd.mixed(
            size=size,
            input=[pd.dotmul_operator(a=input, b=weight, scale=1.0)])
        label = pd.mixed(
            size=size,
            input=[pd.dotmul_operator(a=label, b=weight, scale=1.0)])
        cost = cost_func[cost_type](input=input, label=label)
        fac = pd.sum_cost(input=weight)
        fac = util_layers.math_op(input=fac, act=pd.activation.Inv())
        cost = pd.scaling(input=cost, weight=fac)
        cost = pd.sum_cost(input=cost)
    else:
        cost = cost_func[cost_type](input=input, label=label)
        fac = 1.0 / float(height * width)
        cost = pd.slope_intercept(input=cost, slope=fac, intercept=0.0)
        cost = pd.sum_cost(input=cost)

    return cost