Esempio n. 1
0
    def forward(self, inp):
        if self.div_val == 1:
            embed = self.emb_layers[0](inp)
            if self.d_proj != self.d_embed:
                embed = F.linear(embed, self.emb_projs[0])
        else:
            inp_flat = paddle.reshape(inp, shape=[-1])
            emb_flat = paddle.zeros(
                [inp_flat.shape[0], self.d_proj], dtype=global_dtype)
            for i in range(len(self.cutoffs)):
                l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1]

                mask_i = (inp_flat >= l_idx) & (inp_flat < r_idx)
                indices_i = paddle.nonzero(mask_i).squeeze([1])

                if indices_i.numel() == 0:
                    continue

                inp_i = paddle.gather(inp_flat, indices_i, axis=0) - l_idx
                emb_i = self.emb_layers[i](inp_i)
                emb_i = F.linear(emb_i, self.emb_projs[i])

                emb_flat = paddle.scatter(emb_flat, indices_i, emb_i)

            embed = paddle.reshape(
                emb_flat, shape=inp.shape.append(self.d_proj))

        embed = embed * self.emb_scale

        return embed
Esempio n. 2
0
    def _compute_logits(self, hidden, weight, bias, proj=None):
        if proj is None:
            logit = F.linear(hidden, weight.t(), bias=bias)
        else:
            proj_hid = F.linear(hidden, proj)
            logit = F.linear(proj_hid, weight.t(), bias=bias)

        return logit
Esempio n. 3
0
        def f(w1, w2, x1, x2):
            Z1 = F.linear(x1, w1)
            Z2 = F.linear(x2, w2)
            S1 = safe_divide(R, Z1)
            S2 = safe_divide(R, Z2)
            C1 = x1 * self.gradprop(Z1, x1, S1)[0]
            C2 = x2 * self.gradprop(Z2, x2, S2)[0]

            return C1 + C2
Esempio n. 4
0
    def forward(self, input):
        if self.activation:
            out = F.linear(input, self.weight * self.scale)
            out = fused_leaky_relu(out, self.bias * self.lr_mul)

        else:
            out = F.linear(input,
                           self.weight * self.scale,
                           bias=self.bias * self.lr_mul)

        return out
Esempio n. 5
0
        def f(R, w1, w2, x1, x2):
            R_nonzero = R.not_equal(ZERO_TENSOR).astype(R.dtype)
            Za1 = F.linear(x1, w1) * R_nonzero
            Za2 = -F.linear(x1, w2) * R_nonzero

            Zb1 = -F.linear(x2, w1) * R_nonzero
            Zb2 = F.linear(x2, w2) * R_nonzero

            C1 = pos_prop(R, Za1, Za2, x1)
            C2 = pos_prop(R, Zb1, Zb2, x2)

            return C1 + C2
Esempio n. 6
0
        def first_prop(pd, px, nx, pw, nw):
            Rpp = F.linear(px, pw) * pd
            Rpn = F.linear(px, nw) * pd
            Rnp = F.linear(nx, pw) * pd
            Rnn = F.linear(nx, nw) * pd
            Pos = (Rpp + Rnn).sum(dim=-1, keepdim=True)
            Neg = (Rpn + Rnp).sum(dim=-1, keepdim=True)

            Z1 = F.linear(px, pw)
            Z2 = F.linear(px, nw)
            Z3 = F.linear(nx, pw)
            Z4 = F.linear(nx, nw)

            S1 = safe_divide(Rpp, Z1)
            S2 = safe_divide(Rpn, Z2)
            S3 = safe_divide(Rnp, Z3)
            S4 = safe_divide(Rnn, Z4)
            C1 = px * self.gradprop(Z1, px, S1)[0]
            C2 = px * self.gradprop(Z2, px, S2)[0]
            C3 = nx * self.gradprop(Z3, nx, S3)[0]
            C4 = nx * self.gradprop(Z4, nx, S4)[0]
            bp = self.bias * pd * safe_divide(Pos, Pos + Neg)
            bn = self.bias * pd * safe_divide(Neg, Pos + Neg)
            Sb1 = safe_divide(bp, Z1)
            Sb2 = safe_divide(bn, Z2)
            Cb1 = px * self.gradprop(Z1, px, Sb1)[0]
            Cb2 = px * self.gradprop(Z2, px, Sb2)[0]
            return C1 + C4 + Cb1 + C2 + C3 + Cb2
Esempio n. 7
0
 def functional(self, place):
     paddle.disable_static(place)
     input = paddle.to_tensor(self.input)
     weight = paddle.to_tensor(self.weight)
     bias = paddle.to_tensor(self.bias)
     out = F.linear(input, weight, bias)
     return out.numpy()
Esempio n. 8
0
 def forward(self, inputs):
     with paddle.no_grad():
         eps_in = self._scale_noise(self.epsilon_input.shape)
         eps_out = self._scale_noise(self.epsilon_output.shape)
         noise_v = paddle.multiply(eps_in, eps_out).detach()
     return F.linear(inputs, self.weight + self.sigma_weight * noise_v.t(),
                     self.bias + self.sigma_bias * eps_out.squeeze().t())
Esempio n. 9
0
    def forward(self, input):
        if self._act_preprocess is not None:
            input = self._act_preprocess(input)
        quant_input = self._fake_quant_input(input)

        weight = self.weight
        if self._weight_preprocess is not None:
            weight = self._weight_preprocess(self.weight)
        quant_weight = self._fake_quant_weight(weight)

        out = F.linear(
            x=quant_input, weight=quant_weight, bias=self.bias, name=self.name)
        return out
Esempio n. 10
0
 def forward(self, input, label):
     cosine = F.linear(F.normalize(input), F.normalize(self.weight))
     sine = paddle.sqrt(
         paddle.clip(1.0 - paddle.pow(cosine, 2), min=0, max=1))
     phi = cosine * self.cos_m - sine * self.sin_m
     if self.easy_margin:
         phi = paddle.where(cosine > 0, phi, cosine)
     else:
         phi = paddle.where(cosine > self.th, phi, cosine - self.mm)
     one_hot = paddle.nn.functional.one_hot(label, self.class_dim)
     output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
     output *= self.s
     return output
Esempio n. 11
0
 def forward(self, x):
     """Forward feature from the regression head to get integral result of
     bounding box location.
     Args:
         x (Tensor): Features of the regression head, shape (N, 4*(n+1)),
             n is self.reg_max.
     Returns:
         x (Tensor): Integral result of box locations, i.e., distance
             offsets from the box center in four directions, shape (N, 4).
     """
     x = F.softmax(x.reshape([-1, self.reg_max + 1]), axis=1)
     x = F.linear(x, self.project).reshape([-1, 4])
     return x
Esempio n. 12
0
    def forward(self, x):
        # use inner api to process identity
        if self.is_mp:
            input_parallel = paddle.distributed.collective._c_identity(
                x, group=self.model_parallel_group)
        else:
            input_parallel = x

        output_parallel = F.linear(input_parallel,
                                   self.weight,
                                   self.bias,
                                   name=self._name)

        if self.gather_output and self.is_mp:
            output = paddle.distributed.collective._c_concat(
                output_parallel, group=self.model_parallel_group)
        else:
            output = output_parallel
        return output
Esempio n. 13
0
    def forward(self, input, expand_ratio=None, channel=None):
        self.cur_config = {'expand_ratio': expand_ratio, 'channel': channel}
        ### weight: (Cin, Cout)
        in_nc = int(input.shape[-1])
        assert (
            expand_ratio == None or channel == None
        ), "expand_ratio and channel CANNOT be NOT None at the same time."
        if expand_ratio != None:
            out_nc = int(expand_ratio * self.base_output_dim)
        elif channel != None:
            out_nc = int(channel)
        else:
            out_nc = self._out_features

        weight = self.weight[:in_nc, :out_nc]
        if self._bias_attr != False:
            bias = self.bias[:out_nc]
        else:
            bias = self.bias

        out = F.linear(x=input, weight=weight, bias=bias, name=self.name)
        return out
Esempio n. 14
0
    def forward(self, x):
        if self.input_is_parallel or (not self.is_mp):
            input_parallel = x
        else:
            # split last dim
            input_parallel = paddle.distributed.collective._c_split(
                x, group=self.model_parallel_group)

        output_parallel = F.linear(input_parallel,
                                   self.weight,
                                   name=self._name)

        if self.is_mp:
            output_ = paddle.distributed.collective._mp_allreduce(
                output_parallel,
                group=self.model_parallel_group,
                use_calc_stream=True,
                use_model_parallel=True)
        else:
            output_ = output_parallel

        output = output_ + self.bias if self.bias is not None else output_
        return output
Esempio n. 15
0
 def forward(self, total_features, norm_weight):
     logits = linear(total_features, paddle.t(norm_weight))
     return logits
Esempio n. 16
0
    def forward(self, x, params=None, bn_training=True):
        """
        :param x: 输入图片
        :param params:
        :param bn_training: set False to not update
        :return: 输出分类
        """
        if params is None:
            params = self.vars

        weight, bias = params[0], params[1]  # 第1个CONV层
        x = F.conv2d(x, weight, bias, stride=1, padding=1)
        weight, bias = params[2], params[3]  # 第1个BN层
        running_mean, running_var = self.vars_bn[0], self.vars_bn[1]
        x = F.batch_norm(x,
                         running_mean,
                         running_var,
                         weight=weight,
                         bias=bias,
                         training=bn_training)
        x = F.relu(x)  # 第1个relu
        x = F.max_pool2d(x, kernel_size=2)  # 第1个MAX_POOL层

        weight, bias = params[4], params[5]  # 第2个CONV层
        x = F.conv2d(x, weight, bias, stride=1, padding=1)
        weight, bias = params[6], params[7]  # 第2个BN层
        running_mean, running_var = self.vars_bn[2], self.vars_bn[3]
        x = F.batch_norm(x,
                         running_mean,
                         running_var,
                         weight=weight,
                         bias=bias,
                         training=bn_training)
        x = F.relu(x)  # 第2个relu
        x = F.max_pool2d(x, kernel_size=2)  # 第2个MAX_POOL层

        weight, bias = params[8], params[9]  # 第3个CONV层
        x = F.conv2d(x, weight, bias, stride=1, padding=1)
        weight, bias = params[10], params[11]  # 第3个BN层
        running_mean, running_var = self.vars_bn[4], self.vars_bn[5]
        x = F.batch_norm(x,
                         running_mean,
                         running_var,
                         weight=weight,
                         bias=bias,
                         training=bn_training)
        x = F.relu(x)  # 第3个relu
        x = F.max_pool2d(x, kernel_size=2)  # 第3个MAX_POOL层

        weight, bias = params[12], params[13]  # 第4个CONV层
        x = F.conv2d(x, weight, bias, stride=1, padding=1)
        weight, bias = params[14], params[15]  # 第4个BN层
        running_mean, running_var = self.vars_bn[6], self.vars_bn[7]
        x = F.batch_norm(x,
                         running_mean,
                         running_var,
                         weight=weight,
                         bias=bias,
                         training=bn_training)
        x = F.relu(x)  # 第4个relu
        x = F.max_pool2d(x, kernel_size=2)  # 第4个MAX_POOL层

        x = paddle.reshape(x, [x.shape[0], -1])  ## flatten
        weight, bias = params[-2], params[-1]  # linear
        x = F.linear(x, weight, bias)

        output = x

        return output
Esempio n. 17
0
    def test_static(self):
        paddle.enable_static()
        default_main_program().random_seed = 42
        dtype = "float32"
        layer_norm_dtype = "float32"
        batch_size = 1
        d_model = 8
        dim_feedforward = 8

        x = paddle.static.data(name='x',
                               shape=[batch_size, d_model, dim_feedforward],
                               dtype=dtype)
        linear1_weight = paddle.static.data(name='linear1_weight',
                                            shape=[d_model, dim_feedforward],
                                            dtype=dtype)
        linear1_bias = paddle.static.data(name='linear1_bias',
                                          shape=[dim_feedforward])
        linear2_weight = paddle.static.data(name='linear2_weight',
                                            shape=[dim_feedforward, d_model],
                                            dtype=dtype)
        linear2_bias = paddle.static.data(name='linear2_bias', shape=[d_model])
        ln1_scale = paddle.static.data(name='ln1_scale', shape=[d_model])
        ln1_bias = paddle.static.data(name='ln1_scale', shape=[d_model])
        ln2_scale = paddle.static.data(name='ln2_scale', shape=[d_model])
        ln2_bias = paddle.static.data(name='ln2_scale', shape=[d_model])

        fused_out = incubate_f.fused_feedforward(x,
                                                 linear1_weight,
                                                 linear2_weight,
                                                 linear1_bias,
                                                 linear2_bias,
                                                 ln1_scale,
                                                 ln1_bias,
                                                 ln2_scale,
                                                 ln2_bias,
                                                 0.0,
                                                 0.0,
                                                 activation="relu",
                                                 pre_layer_norm=False)

        ######base ffn######
        linear1_out = F.linear(x, linear1_weight, linear1_bias)
        act_out = F.relu(linear1_out)
        dropout1_out = F.dropout(x=act_out, p=0.0, training=False)
        linear2_out = F.linear(dropout1_out, linear2_weight, linear2_bias)
        dropout2_out = x + F.dropout(x=linear2_out, p=0.0, training=False)
        ln_out = F.layer_norm(dropout2_out,
                              normalized_shape=list([d_model]),
                              weight=ln2_scale,
                              bias=ln2_bias)
        ######base ffn######

        exe = paddle.static.Executor(paddle.CUDAPlace(0))

        x_data = np.random.random(
            (batch_size, d_model, dim_feedforward)).astype(dtype)
        linear1_weight_data = np.random.random(
            (d_model, dim_feedforward)).astype(dtype)
        linear1_bias_data = np.zeros((dim_feedforward)).astype(dtype)
        linear2_weight_data = np.random.random(
            (dim_feedforward, d_model)).astype(dtype)
        linear2_bias_data = np.zeros((d_model)).astype(dtype)

        ln1_scale_data = np.ones((d_model)).astype(layer_norm_dtype)
        ln1_bias_data = np.zeros((d_model)).astype(layer_norm_dtype)
        ln2_scale_data = np.ones((d_model)).astype(layer_norm_dtype)
        ln2_bias_data = np.zeros((d_model)).astype(layer_norm_dtype)

        res_list = [fused_out, ln_out]
        real_res = []

        for res in res_list:
            fetch = exe.run(feed={
                'x': x_data,
                'linear1_weight': linear1_weight_data,
                'linear1_bias': linear1_bias_data,
                'linear2_weight': linear2_weight_data,
                'linear2_bias': linear2_bias_data,
                'ln1_scale': ln1_scale_data,
                'ln1_bias': ln1_bias_data,
                'ln2_scale': ln2_scale_data,
                'ln2_bias': ln2_bias_data
            },
                            fetch_list=[res])
            real_res.append(fetch)
        self.assertTrue(np.allclose(real_res[0], real_res[1], atol=1e-3),
                        "two value is check diff")