Example #1
0
 def _beta(self, h, u_expr, h_expr):
     with layer.mixed(bias_attr=False) as dot_h_u_expr:
         dot_h_u_expr += layer.dotmul_operator(a=h, b=u_expr)
     with layer.mixed(bias_attr=False) as dot_h_h_expr:
         dot_h_h_expr += layer.dotmul_operator(a=h, b=h_expr)
     cat_all = layer.concat(input=[h, u_expr, dot_h_u_expr, dot_h_h_expr])
     return cat_all
Example #2
0
    def test_operator(self):
        ipt0 = layer.data(name='data1', type=data_type.dense_vector(784))
        ipt1 = layer.data(name='word1', type=data_type.dense_vector(128))
        fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
        fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())

        dotmul_op = layer.dotmul_operator(a=fc0, b=fc1)
        dotmul0 = layer.mixed(input=dotmul_op)
        with layer.mixed() as dotmul1:
            dotmul1 += dotmul_op

        conv = layer.conv_operator(
            img=ipt0,
            filter=ipt1,
            filter_size=1,
            num_channels=1,
            num_filters=128,
            stride=1,
            padding=0)
        conv0 = layer.mixed(input=conv)
        with layer.mixed() as conv1:
            conv1 += conv

        print layer.parse_network(dotmul0)
        print layer.parse_network(dotmul1)
        print layer.parse_network(conv0)
        print layer.parse_network(conv1)
Example #3
0
    def test_operator(self):
        ipt0 = layer.data(name='data', type=data_type.dense_vector(784))
        ipt1 = layer.data(name='word', type=data_type.dense_vector(128))
        fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
        fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())

        dotmul_op = layer.dotmul_operator(a=fc0, b=fc1)
        dotmul0 = layer.mixed(input=dotmul_op)
        with layer.mixed() as dotmul1:
            dotmul1 += dotmul_op

        conv = layer.conv_operator(img=ipt0,
                                   filter=ipt1,
                                   filter_size=1,
                                   num_channels=1,
                                   num_filters=128,
                                   stride=1,
                                   padding=0)
        conv0 = layer.mixed(input=conv)
        with layer.mixed() as conv1:
            conv1 += conv

        print layer.parse_network(dotmul0)
        print layer.parse_network(dotmul1)
        print layer.parse_network(conv0)
        print layer.parse_network(conv1)
Example #4
0
    def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj):
        with layer.mixed(size=cur_token.size,
                         bias_attr=Attr.Param(direct + '.bp', initial_std=0.),
                         act=Act.Linear()) as proj:
            proj += layer.full_matrix_projection(input=cur_token,
                                                 param_attr=Attr.Param(direct +
                                                                       '.wp'))
            proj += layer.full_matrix_projection(input=prev,
                                                 param_attr=Attr.Param(direct +
                                                                       '.wr'))

        expanded = layer.expand(input=proj, expand_as=to_apply)
        att_context = layer.addto(input=[expanded, to_apply_proj],
                                  act=Act.Tanh(),
                                  bias_attr=False)

        att_weights = layer.fc(input=att_context,
                               param_attr=Attr.Param(direct + '.w'),
                               bias_attr=Attr.Param(direct + '.b',
                                                    initial_std=0.),
                               act=Act.SequenceSoftmax(),
                               size=1)
        scaled = layer.scaling(input=to_apply, weight=att_weights)
        applied = layer.pooling(input=scaled,
                                pooling_type=paddle.pooling.Sum())
        return applied
Example #5
0
    def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj):
        with layer.mixed(size=cur_token.size,
                         bias_attr=Attr.Param(direct + '.bp',
                             initial_std=0.),
                         act=Act.Linear()) as proj:
            proj += layer.full_matrix_projection(
                    input=cur_token,
                    param_attr=Attr.Param(direct + '.wp'))
            proj += layer.full_matrix_projection(
                    input=prev,
                    param_attr=Attr.Param(direct + '.wr'))

        expanded = layer.expand(input=proj, expand_as=to_apply)
        att_context = layer.addto(input=[expanded, to_apply_proj],
                                  act=Act.Tanh(),
                                  bias_attr=False)

        att_weights = layer.fc(input=att_context,
                               param_attr=Attr.Param(direct + '.w'),
                               bias_attr=Attr.Param(direct + '.b',
                                   initial_std=0.),
                               act=Act.SequenceSoftmax(),
                               size=1)
        scaled = layer.scaling(input=to_apply, weight=att_weights)
        applied = layer.pooling(input=scaled,
                                pooling_type=paddle.pooling.Sum())
        return applied
Example #6
0
    def get_loss(self, start_prob, end_prob, start_label, end_label):
        """
        Compute the loss: $l_{\theta} = -logP(start)\cdotP(end|start)$

        Returns:
            A LayerOutput object containing loss.
        """
        probs = layer.seq_concat(a=start_prob, b=end_prob)
        labels = layer.seq_concat(a=start_label, b=end_label)

        log_probs = layer.mixed(
                    size=probs.size,
                    act=Act.Log(),
                    bias_attr=False,
                    input=paddle.layer.identity_projection(probs))

        neg_log_probs = layer.slope_intercept(
                        input=log_probs,
                        slope=-1,
                        intercept=0)

        loss = paddle.layer.mixed(
               size=1,
               input=paddle.layer.dotmul_operator(a=neg_log_probs, b=labels))

        sum_val = paddle.layer.pooling(input=loss,
                                       pooling_type=paddle.pooling.Sum())
        cost = paddle.layer.sum_cost(input=sum_val)
        return cost
Example #7
0
 def _u_step(self, h_cur, u):
     s = self._step_basic(h_cur, u)
     with layer.mixed(size=1, bias_attr=False,
                      act=Act.SequenceSoftmax()) as h_weights:
         h_weights += layer.identity_projection(s)
     applied_weights = layer.scaling(input=u, weight=h_weights)
     u_ctx = layer.pooling(input=applied_weights,
                           pooling_type=paddle.pooling.Sum())
     return u_ctx
Example #8
0
def inner_product_cost(input,
                       label,
                       weight,
                       height,
                       width,
                       num_channel,
                       interp='nearest',
                       is_angle=False):
    """If is_angle, we can not back propagate through the angle, only back
       through the inner product, the loss is not consistent with the evaluation.
    """
    # make sure all the input label and weight have the same size
    if height > 1 and width > 1:
        input = pd.bilinear_interp(input=input,
                                   out_size_x=width,
                                   out_size_y=height)
        label = pd.bilinear_interp(input=label,
                                   out_size_x=width,
                                   out_size_y=height)
        if weight:
            weight = image_resize_func[interp](input=weight,
                                               out_size_x=width,
                                               out_size_y=height)

    size = height * width * num_channel

    input = util_layers.norm(input,
                             height,
                             width,
                             num_channel,
                             trans_back=False)
    label = util_layers.norm(label,
                             height,
                             width,
                             num_channel,
                             trans_back=False)

    inner = pd.mixed(size=size,
                     input=[pd.dotmul_operator(a=input, b=label, scale=1.0)])
    inner = pd.resize(input=pd.sum_cost(input=inner),
                      size=height * width,
                      height=height,
                      width=width)
    if is_angle:
        inner = util_layers.math_op(input=inner, act=pd.activation.Acos())
    else:
        inner = pd.slope_intercept(input=inner, slope=-1, intercept=1.0)

    if weight:
        inner_error = sum_weighted_loss(inner, weight, size=height * width)
    else:
        fac = 1.0 / float(height * width)
        inner = pd.slope_intercept(input=inner, slope=fac, intercept=0.0)
        inner_error = pd.sum_cost(input=inner)

    return inner_error
Example #9
0
 def _step_basic(self, h_cur, u):
     expanded_h = layer.expand(input=h_cur, expand_as=u)
     hu = layer.concat(input=[expanded_h, u])
     with layer.mixed(bias_attr=False) as dot_hu:
         dot_hu += layer.dotmul_operator(a=expanded_h, b=u)
     cat_all = layer.concat(input=[hu, dot_hu])
     s = layer.fc(size=1,
                  bias_attr=False,
                  param_attr=Attr.Param(self.name + '.ws'),
                  input=cat_all)
     return s
Example #10
0
def sum_weighted_loss(loss, weight, size=1):
    """Loss has input batch_size x image_size, weight has input batch_size x weight
        ( i * w ) / sum(W)
       The output is normalized weighted loss
    """
    weighted_loss = pd.mixed(
        size=size, input=[pd.dotmul_operator(a=loss, b=weight, scale=1.0)])
    weight_fac = pd.sum_cost(input=weight)
    weight_fac = util_layers.math_op(input=weight_fac, act=pd.activation.Inv())
    weighted_loss = pd.scaling(input=loss, weight=weight_fac)
    weighted_loss = pd.sum_cost(input=weighted_loss)

    return weighted_loss
Example #11
0
def ele_norm_cost(input,
                  label,
                  weight,
                  height=None,
                  width=None,
                  num_channel=None,
                  cost_type='l1'):
    if height > 1 and width > 1:
        input = pd.bilinear_interp(input=input,
                                   out_size_x=width,
                                   out_size_y=height)
        label = pd.bilinear_interp(input=label,
                                   out_size_x=width,
                                   out_size_y=height)
        if weight:
            weight = pd.nearest_interp(input=weight,
                                       out_size_x=width,
                                       out_size_y=height)

    size = height * width * num_channel
    if weight:
        input = pd.mixed(
            size=size,
            input=[pd.dotmul_operator(a=input, b=weight, scale=1.0)])
        label = pd.mixed(
            size=size,
            input=[pd.dotmul_operator(a=label, b=weight, scale=1.0)])
        cost = cost_func[cost_type](input=input, label=label)
        fac = pd.sum_cost(input=weight)
        fac = util_layers.math_op(input=fac, act=pd.activation.Inv())
        cost = pd.scaling(input=cost, weight=fac)
        cost = pd.sum_cost(input=cost)
    else:
        cost = cost_func[cost_type](input=input, label=label)
        fac = 1.0 / float(height * width)
        cost = pd.slope_intercept(input=cost, slope=fac, intercept=0.0)
        cost = pd.sum_cost(input=cost)

    return cost
Example #12
0
def math_op(input, act=pd.activation.Linear(), op='dot', size=0):
    if not isinstance(input, list):
        input = [input]

    if len(input) == 1:
        # unary operation
        result = pd.mixed(
                input=[pd.identity_projection(input=input[0])], act=act)

    elif len(input) == 2:
        # binary operation
        if op == 'dot':
            result = pd.mixed(size=size,
                              input=pd.dotmul_operator(
                                        a=input[0],
                                        b=input[1],
                                        scale=1.0),
                              act=act)
    else:
        raise ValueError('not supporting math op with more than two\
                          input')

    return result
Example #13
0
def iou_score(input, label, weight, height, width, class_num, is_cost=True):
    """ class num is semantic classes plus background,
        this score can also serve as iou cost for training
    """
    # input = pd.resize(input=input, size=height * width)
    # label = pd.resize(input=label, size=height * width)

    weight = pd.nearest_interp(input=weight,
                               out_size_x=width,
                               out_size_y=height)
    if not is_cost:
        # if not is cost, then it is eval, we can do
        # one hot for label. Otherwise
        input = util_layers.math_op(input=[input, weight], op='dot')
        input_one_hot = util_layers.ele_one_hot(input, class_num, height,
                                                width)
    else:
        input_one_hot = input
        input_one_hot = pd.bilinear_interp(input=input_one_hot,
                                           out_size_x=width,
                                           out_size_y=height)

    label = pd.nearest_interp(input=label, out_size_x=width, out_size_y=height)
    label = util_layers.math_op(input=[label, weight], op='dot')

    label_one_hot = util_layers.ele_one_hot(label, class_num, height, width)
    inter = util_layers.math_op(input=[input_one_hot, label_one_hot], op='dot')
    union = pd.addto(input=[input_one_hot, label_one_hot],
                     act=pd.activation.Linear(),
                     bias_attr=False)
    inter_neg = pd.slope_intercept(input=inter, slope=-1)

    union = pd.addto(input=[union, inter_neg],
                     act=pd.activation.Linear(),
                     bias_attr=False)

    inter = pd.resize(input=inter, size=height * width)
    inter = pd.sum_cost(input=inter)
    union = pd.resize(input=union, size=height * width)
    union = pd.sum_cost(input=union)

    union_inv = util_layers.math_op(input=union, act=pd.activation.Inv())
    iou = pd.mixed(size=1,
                   input=[pd.dotmul_operator(a=inter, b=union_inv, scale=1.0)])
    iou = pd.resize(input=iou, size=class_num)

    if is_cost:
        iou = pd.sum_cost(iou)

    return iou
Example #14
0
def ns_ele_l2_cost(input,
                   label,
                   weight,
                   height,
                   width,
                   num_channel=None,
                   interp='nearest'):
    assert interp in image_resize_func.keys()
    # make sure all the input label and weight have the same size
    input = pd.bilinear_interp(input=input,
                               out_size_x=width,
                               out_size_y=height)
    label = image_resize_func[interp](input=label,
                                      out_size_x=width,
                                      out_size_y=height)
    weight = image_resize_func[interp](input=weight,
                                       out_size_x=width,
                                       out_size_y=height)

    # reshape the orignal layer
    # input has shape  c x h x w change to h x w x c
    input_ts = pd.transpose(input=input,
                            trans_order=[1, 2, 0],
                            height=height,
                            width=width)
    input_rs = pd.resize(input=input_ts, size=num_channel, height=1, width=1)

    label_ts = pd.transpose(input=label,
                            trans_order=[1, 2, 0],
                            height=height,
                            width=width)
    label_rs = pd.resize(input=label_ts, size=num_channel, height=1, width=1)
    weight_rs = pd.resize(input=weight, size=1, height=1, width=1)

    cost_rs = pd.mse_cost(input=input_rs, label=label_rs)
    sqrt_l2_cost = util_layers.math_op(input=cost_rs, act=pd.activation.Sqrt())
    sqrt_l2_cost = pd.mixed(
        size=1,
        input=[pd.dotmul_operator(a=sqrt_l2_cost, b=weight_rs, scale=1.0)])
    sqrt_l2_cost = pd.resize(input=sqrt_l2_cost,
                             size=height * width,
                             height=height,
                             width=width)

    weight_fac = pd.sum_cost(input=weight)
    weight_fac = util_layers.math_op(input=weight_fac, act=pd.activation.Inv())
    sqrt_l2_cost = pd.scaling(input=sqrt_l2_cost, weight=weight_fac)
    cost = pd.sum_cost(input=sqrt_l2_cost)

    return cost
Example #15
0
 def _attention_flow(self, h, u):
     bs = layer.recurrent_group(input=[h, layer.StaticInput(u)],
                                step=self._h_step,
                                reverse=False)
     b_weights = layer.mixed(act=Act.SequenceSoftmax(),
                             bias_attr=False,
                             input=layer.identity_projection(bs))
     h_step_scaled = layer.scaling(input=h, weight=b_weights)
     h_step = layer.pooling(input=h_step_scaled,
                            pooling_type=paddle.pooling.Sum())
     h_expr = layer.expand(input=h_step, expand_as=h)
     u_expr = layer.recurrent_group(input=[h, layer.StaticInput(u)],
                                    step=self._u_step,
                                    reverse=False)
     g = self._beta(h, u_expr, h_expr)
     return g
Example #16
0
    def drop_out(self, input, drop_rate=0.5):
        """
        Implements drop out.

        Args:
            input: the LayerOutput needs to apply drop out.
            drop_rate: drop out rate.

        Returns:
            The layer output after applying drop out.
        """
        with layer.mixed(
                layer_attr=Attr.ExtraLayerAttribute(drop_rate=drop_rate),
                bias_attr=False) as dropped:
            dropped += layer.identity_projection(input)
        return dropped
Example #17
0
    def fusion_layer(self, input1, input2):
        """
        Combine input1 and input2 by concat(input1 .* input2, input1 - input2,
        input1, input2)
        """
        # fusion layer
        neg_input2 = layer.slope_intercept(input=input2,
                slope=-1.0,
                intercept=0.0)
        diff1 = layer.addto(input=[input1, neg_input2],
                act=Act.Identity(),
                bias_attr=False)
        diff2 = layer.mixed(bias_attr=False,
                input=layer.dotmul_operator(a=input1, b=input2))

        fused = layer.concat(input=[input1, input2, diff1, diff2])
        return fused
Example #18
0
    def drop_out(self, input, drop_rate=0.5):
        """
        Implements drop out.

        Args:
            input: the LayerOutput needs to apply drop out.
            drop_rate: drop out rate.

        Returns:
            The layer output after applying drop out.
        """
        with layer.mixed(
                layer_attr=Attr.ExtraLayerAttribute(
                    drop_rate=drop_rate),
                bias_attr=False) as dropped:
            dropped += layer.identity_projection(input)
        return dropped
Example #19
0
    def fusion_layer(self, input1, input2):
        """
        Combine input1 and input2 by concat(input1 .* input2, input1 - input2,
        input1, input2)
        """
        # fusion layer
        neg_input2 = layer.slope_intercept(input=input2,
                                           slope=-1.0,
                                           intercept=0.0)
        diff1 = layer.addto(input=[input1, neg_input2],
                            act=Act.Identity(),
                            bias_attr=False)
        diff2 = layer.mixed(bias_attr=False,
                            input=layer.dotmul_operator(a=input1, b=input2))

        fused = layer.concat(input=[input1, input2, diff1, diff2])
        return fused
Example #20
0
def relative_l1(input,
                label,
                weight,
                height,
                width,
                interp='nearest',
                is_inverse=False):
    """Relative l1 loss for depth
    """

    assert interp in image_resize_func.keys()

    # make sure all the input label and weight have the same size
    if height > 1 and width > 1:
        input = pd.bilinear_interp(input=input,
                                   out_size_x=width,
                                   out_size_y=height)
        label = pd.bilinear_interp(input=label,
                                   out_size_x=width,
                                   out_size_y=height)
        if weight:
            weight = image_resize_func[interp](input=weight,
                                               out_size_x=width,
                                               out_size_y=height)

    label_inv = util_layers.math_op(input=label, act=pd.activation.Inv())
    label_neg = pd.slope_intercept(input=label, slope=-1)
    diff = pd.addto(input=[input, label_neg],
                    act=pd.activation.Abs(),
                    bias_attr=False)

    rel_error = pd.mixed(
        size=1, input=[pd.dotmul_operator(a=diff, b=label_inv, scale=1.0)])

    if weight:
        rel_error = sum_weighted_loss(rel_error, weight, size=height * width)
    else:
        fac = 1.0 / float(height * width)
        inner = pd.slope_intercept(input=inner, slope=fac, intercept=0.0)
        inner_error = pd.sum_cost(input=inner)

    return rel_error
Example #21
0
    def _step(self, name, h_q_all, q_proj, h_p_cur):
        """
        Match-LSTM step. This function performs operations done in one
        time step.

        Args:
            h_p_cur: Current hidden of paragraph encodings: h_i.
                     This is the `REAL` input of the group, like
                     x_t in normal rnn.
            h_q_all: Question encodings.

        Returns:
            The $h^{r}_{i}$ in the paper.
        """
        direct = 'left' if 'left' in name else 'right'

        h_r_prev = paddle.layer.memory(name=name + '_out_',
                                       size=h_q_all.size,
                                       boot_layer=None)
        q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj)
        z_cur = self.fusion_layer(h_p_cur, q_expr)

        with layer.mixed(size=h_q_all.size * 4,
                         act=Act.Tanh(),
                         bias_attr=False) as match_input:
            match_input += layer.full_matrix_projection(
                           input=z_cur,
                           param_attr=Attr.Param('match_input_%s.w0' % direct))

        step_out = paddle.networks.lstmemory_unit(
                   name=name + '_out_',
                   out_memory=h_r_prev,
                   param_attr=Attr.Param('step_lstm_%s.w' % direct),
                   input_proj_bias_attr=Attr.Param(
                       'step_lstm_mixed_%s.bias' % direct,
                       initial_std=0.),
                   lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct,
                       initial_std=0.),
                   input=match_input,
                   size=h_q_all.size)
        return step_out
Example #22
0
    def _step(self, name, h_q_all, q_proj, h_p_cur):
        """
        Match-LSTM step. This function performs operations done in one
        time step.

        Args:
            h_p_cur: Current hidden of paragraph encodings: h_i.
                     This is the `REAL` input of the group, like
                     x_t in normal rnn.
            h_q_all: Question encodings.

        Returns:
            The $h^{r}_{i}$ in the paper.
        """
        direct = 'left' if 'left' in name else 'right'

        h_r_prev = paddle.layer.memory(name=name + '_out_',
                                       size=h_q_all.size,
                                       boot_layer=None)
        q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj)
        z_cur = self.fusion_layer(h_p_cur, q_expr)

        with layer.mixed(size=h_q_all.size * 4,
                         act=Act.Tanh(),
                         bias_attr=False) as match_input:
            match_input += layer.full_matrix_projection(
                input=z_cur,
                param_attr=Attr.Param('match_input_%s.w0' % direct))

        step_out = paddle.networks.lstmemory_unit(
            name=name + '_out_',
            out_memory=h_r_prev,
            param_attr=Attr.Param('step_lstm_%s.w' % direct),
            input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct,
                                            initial_std=0.),
            lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct,
                                      initial_std=0.),
            input=match_input,
            size=h_q_all.size)
        return step_out
Example #23
0
    def network(self):
        """
        Implements the detail of the model.
        """
        self.check_and_create_data()
        self.create_shared_params()
        q_enc = self.get_enc(self.q_ids, type='q')
        a_enc = self.get_enc(self.a_ids, type='q')

        q_proj_left = layer.fc(size=self.emb_dim * 2,
                               bias_attr=False,
                               param_attr=Attr.Param(self.name + '_left.wq'),
                               input=q_enc)
        q_proj_right = layer.fc(size=self.emb_dim * 2,
                                bias_attr=False,
                                param_attr=Attr.Param(self.name + '_right.wq'),
                                input=q_enc)
        left_match = self.recurrent_group(
            self.name + '_left',
            [layer.StaticInput(q_enc),
             layer.StaticInput(q_proj_left), a_enc],
            reverse=False)
        right_match = self.recurrent_group(
            self.name + '_right',
            [layer.StaticInput(q_enc),
             layer.StaticInput(q_proj_right), a_enc],
            reverse=True)
        match_seq = layer.concat(input=[left_match, right_match])
        with layer.mixed(size=match_seq.size,
                         act=Act.Identity(),
                         layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2),
                         bias_attr=False) as dropped:
            dropped += layer.identity_projection(match_seq)
        match_result = layer.pooling(input=dropped,
                                     pooling_type=paddle.pooling.Max())
        cls = layer.fc(input=match_result,
                       act=Act.Softmax(),
                       size=self.label_dim)
        return cls
Example #24
0
    def network(self):
        """
        Implements the detail of the model.
        """
        self.check_and_create_data()
        self.create_shared_params()
        q_enc = self.get_enc(self.q_ids, type='q')
        a_enc = self.get_enc(self.a_ids, type='q')

        q_proj_left = layer.fc(size=self.emb_dim * 2,
                bias_attr=False,
                param_attr=Attr.Param(self.name + '_left.wq'),
                input=q_enc)
        q_proj_right = layer.fc(size=self.emb_dim * 2,
                bias_attr=False,
                param_attr=Attr.Param(self.name + '_right.wq'),
                input=q_enc)
        left_match = self.recurrent_group(self.name + '_left',
                [layer.StaticInput(q_enc),
                    layer.StaticInput(q_proj_left), a_enc],
                reverse=False)
        right_match = self.recurrent_group(self.name + '_right',
                [layer.StaticInput(q_enc),
                    layer.StaticInput(q_proj_right), a_enc],
                reverse=True)
        match_seq = layer.concat(input=[left_match, right_match])
        with layer.mixed(size=match_seq.size,
                act=Act.Identity(),
                layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2),
                bias_attr=False) as dropped:
            dropped += layer.identity_projection(match_seq)
        match_result = layer.pooling(input=dropped,
                pooling_type=paddle.pooling.Max())
        cls = layer.fc(input=match_result,
                act=Act.Softmax(),
                size=self.label_dim)
        return cls
Example #25
0
def norm(input, height, width, channel, type='l2', trans_back=True):
    """Channel wise normalize each layer
    """
    size = height * width * channel
    if height > 1 or width > 1:
        input= pd.transpose(input=input,
                            trans_order=[1, 2, 0],
                            height=height,
                            width=width)
        input = pd.resize(input=input, size=channel)

    if type == 'l2':
        norm = pd.mixed(size=size,
                        input=[pd.dotmul_operator(a=input,
                                                  b=input,
                                                  scale=1.0)])
        norm = pd.sum_cost(input=norm)
        norm = math_op(norm, pd.activation.Sqrt())

    if type == 'l1':
        norm = math_op(input, pd.activation.Abs())
        norm = pd.sum_cost(input=norm)

    norm_inv = math_op(norm, pd.activation.Inv())
    norm_inv = pd.repeat(input=norm_inv, num_repeats=3)
    input = math_op(input=[input, norm_inv],
                    act=None, op='dot', size=size)

    if trans_back:
        input = pd.resize(input=input, size=size)
        input = pd.transpose(input=input,
                             trans_order=[2, 0, 1],
                             height=width,
                             width=channel,
                             channels=height)
    return input
Example #26
0
    def _step(self, name, h_q_all, q_proj, h_p_cur, qe_comm, ee_comm):
        """
        Match-LSTM step. This function performs operations done in one
        time step.

        Args:
            h_p_cur: Current hidden of paragraph encodings: h_i.
                     This is the `REAL` input of the group, like
                     x_t in normal rnn.
            h_q_all: Question encodings.

        Returns:
            The $h^{r}_{i}$ in the paper.
        """
        conf = mLSTM_crf_config.TrainingConfig()
        direct = 'left' if 'left' in name else 'right'

        # 获取上一个时间步的输出
        h_r_prev = paddle.layer.memory(name=name + '_out_',
                                       size=h_q_all.size,
                                       boot_layer=None)
        # h_p_cur :: Current hidden of paragraph encodings
        # h_q_all :: q wordEmbedding
        # q_proj  :: q_proj_(left or right)
        q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj)
        z_cur = self.fusion_layer(h_p_cur, q_expr)

        # feature embeddings
        comm_initial_std = 1 / math.sqrt(64.0)
        qe_comm_emb = paddle.layer.embedding(input=qe_comm,
                                             size=conf.com_vec_dim,
                                             param_attr=paddle.attr.ParamAttr(
                                                 name="_cw_embedding.w0",
                                                 initial_std=comm_initial_std,
                                                 l2_rate=conf.default_l2_rate))

        ee_comm_emb = paddle.layer.embedding(input=ee_comm,
                                             size=conf.com_vec_dim,
                                             param_attr=paddle.attr.ParamAttr(
                                                 name="_eecom_embedding.w0",
                                                 initial_std=comm_initial_std,
                                                 l2_rate=conf.default_l2_rate))

        # layer.mixed :: 综合输入映射到指定维度,为 lstm 的输入做准备!
        with layer.mixed(size=h_q_all.size * 4,
                         act=Act.Tanh(),
                         bias_attr=False) as match_input:
            match_input += layer.full_matrix_projection(
                input=z_cur,
                param_attr=Attr.Param('match_input_z_%s.w0' % direct))
            match_input += layer.full_matrix_projection(
                input=qe_comm_emb,
                param_attr=Attr.Param('match_input_qe_%s.w0' % direct))
            match_input += layer.full_matrix_projection(
                input=ee_comm_emb,
                param_attr=Attr.Param('match_input_ee_%s.w0' % direct))

        step_out = paddle.networks.lstmemory_unit(
            name=name + '_out_',
            out_memory=h_r_prev,
            param_attr=Attr.Param('step_lstm_%s.w' % direct),
            input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct,
                                            initial_std=0.),
            lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct,
                                      initial_std=0.),
            input=match_input,
            size=h_q_all.size)
        return step_out
Example #27
0
    def test_projection(self):
        input = layer.data(name='data2', type=data_type.dense_vector(784))
        word = layer.data(
            name='word2', type=data_type.integer_value_sequence(10000))
        fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
        fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
        mixed0 = layer.mixed(
            size=256,
            input=[
                layer.full_matrix_projection(input=fc0),
                layer.full_matrix_projection(input=fc1)
            ])
        with layer.mixed(size=200) as mixed1:
            mixed1 += layer.full_matrix_projection(input=fc0)
            mixed1 += layer.identity_projection(input=fc1)

        table = layer.table_projection(input=word)
        emb0 = layer.mixed(size=512, input=table)
        with layer.mixed(size=512) as emb1:
            emb1 += table

        scale = layer.scaling_projection(input=fc0)
        scale0 = layer.mixed(size=100, input=scale)
        with layer.mixed(size=100) as scale1:
            scale1 += scale

        dotmul = layer.dotmul_projection(input=fc0)
        dotmul0 = layer.mixed(size=100, input=dotmul)
        with layer.mixed(size=100) as dotmul1:
            dotmul1 += dotmul

        context = layer.context_projection(input=fc0, context_len=5)
        context0 = layer.mixed(size=500, input=context)
        with layer.mixed(size=500) as context1:
            context1 += context

        conv = layer.conv_projection(
            input=input,
            filter_size=1,
            num_channels=1,
            num_filters=128,
            stride=1,
            padding=0)
        conv0 = layer.mixed(input=conv, bias_attr=True)
        with layer.mixed(bias_attr=True) as conv1:
            conv1 += conv

        print layer.parse_network(mixed0)
        print layer.parse_network(mixed1)
        print layer.parse_network(emb0)
        print layer.parse_network(emb1)
        print layer.parse_network(scale0)
        print layer.parse_network(scale1)
        print layer.parse_network(dotmul0)
        print layer.parse_network(dotmul1)
        print layer.parse_network(conv0)
        print layer.parse_network(conv1)
Example #28
0
    def test_projection(self):
        input = layer.data(name='data', type=data_type.dense_vector(784))
        word = layer.data(name='word',
                          type=data_type.integer_value_sequence(10000))
        fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
        fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
        mixed0 = layer.mixed(size=256,
                             input=[
                                 layer.full_matrix_projection(input=fc0),
                                 layer.full_matrix_projection(input=fc1)
                             ])
        with layer.mixed(size=200) as mixed1:
            mixed1 += layer.full_matrix_projection(input=fc0)
            mixed1 += layer.identity_projection(input=fc1)

        table = layer.table_projection(input=word)
        emb0 = layer.mixed(size=512, input=table)
        with layer.mixed(size=512) as emb1:
            emb1 += table

        scale = layer.scaling_projection(input=fc0)
        scale0 = layer.mixed(size=100, input=scale)
        with layer.mixed(size=100) as scale1:
            scale1 += scale

        dotmul = layer.dotmul_projection(input=fc0)
        dotmul0 = layer.mixed(size=100, input=dotmul)
        with layer.mixed(size=100) as dotmul1:
            dotmul1 += dotmul

        context = layer.context_projection(input=fc0, context_len=5)
        context0 = layer.mixed(size=100, input=context)
        with layer.mixed(size=100) as context1:
            context1 += context

        conv = layer.conv_projection(input=input,
                                     filter_size=1,
                                     num_channels=1,
                                     num_filters=128,
                                     stride=1,
                                     padding=0)
        conv0 = layer.mixed(input=conv, bias_attr=True)
        with layer.mixed(bias_attr=True) as conv1:
            conv1 += conv

        print layer.parse_network(mixed0)
        print layer.parse_network(mixed1)
        print layer.parse_network(emb0)
        print layer.parse_network(emb1)
        print layer.parse_network(scale0)
        print layer.parse_network(scale1)
        print layer.parse_network(dotmul0)
        print layer.parse_network(dotmul1)
        print layer.parse_network(conv0)
        print layer.parse_network(conv1)