Ejemplo n.º 1
0
    def test_dygraph(self):
        with fluid.dygraph.guard():
            x1 = fluid.dygraph.to_variable(np.array([1, 3]).astype(np.float32))
            y1 = fluid.dygraph.to_variable(np.array([2, 5]).astype(np.float32))
            self.assertTrue(
                np.allclose(paddle.dot(x1, y1).numpy(), np.array([17])))

            x1 = fluid.dygraph.to_variable(
                np.array([[1, 3], [3, 5]]).astype(np.float32))
            y1 = fluid.dygraph.to_variable(
                np.array([[2, 5], [6, 8]]).astype(np.float32))
            self.assertTrue(
                np.array_equal(
                    paddle.dot(x1, y1).numpy(), np.array([[17], [58]])))
Ejemplo n.º 2
0
    def compute_weight(self, module, do_power_iteration):
        weight = getattr(module, self.name + '_orig')
        u = getattr(module, self.name + '_u')
        v = getattr(module, self.name + '_v')
        weight_mat = self.reshape_weight_to_matrix(weight)

        if do_power_iteration:
            with paddle.no_grad():
                for _ in range(self.n_power_iterations):
                    v.set_value(
                        F.normalize(
                            paddle.matmul(weight_mat,
                                          u,
                                          transpose_x=True,
                                          transpose_y=False),
                            axis=0,
                            epsilon=self.eps,
                        ))

                    u.set_value(
                        F.normalize(
                            paddle.matmul(weight_mat, v),
                            axis=0,
                            epsilon=self.eps,
                        ))
                if self.n_power_iterations > 0:
                    u = u.clone()
                    v = v.clone()

        sigma = paddle.dot(u, paddle.mv(weight_mat, v))
        weight = weight / sigma
        return weight
Ejemplo n.º 3
0
    def forward(self,
                query_input_ids,
                pos_title_input_ids,
                neg_title_input_ids,
                is_prediction=False,
                query_token_type_ids=None,
                query_position_ids=None,
                query_attention_mask=None,
                pos_title_token_type_ids=None,
                pos_title_position_ids=None,
                pos_title_attention_mask=None,
                neg_title_token_type_ids=None,
                neg_title_position_ids=None,
                neg_title_attention_mask=None):
        query_cls_embedding = self.get_pooled_embedding(
            query_input_ids, query_token_type_ids, query_position_ids,
            query_attention_mask)

        pos_title_cls_embedding = self.get_pooled_embedding(
            pos_title_input_ids, pos_title_token_type_ids,
            pos_title_position_ids, pos_title_attention_mask)

        neg_title_cls_embedding = self.get_pooled_embedding(
            neg_title_input_ids, neg_title_token_type_ids,
            neg_title_position_ids, neg_title_attention_mask)

        all_title_cls_embedding = paddle.concat(
            x=[pos_title_cls_embedding, neg_title_cls_embedding], axis=0)

        if is_prediction:
            logits = paddle.dot(query_cls_embedding, pos_title_cls_embedding)
            outputs = {
                "probs": logits,
                "q_rep": query_cls_embedding,
                "p_rep": pos_title_cls_embedding
            }
            return outputs

        if self.use_cross_batch:
            tensor_list = []
            paddle.distributed.all_gather(tensor_list, all_title_cls_embedding)
            all_title_cls_embedding = paddle.concat(x=tensor_list, axis=0)

        # multiply
        logits = paddle.matmul(query_cls_embedding,
                               all_title_cls_embedding,
                               transpose_y=True)

        batch_size = query_cls_embedding.shape[0]

        labels = paddle.arange(batch_size * self.rank * 2,
                               batch_size * (self.rank * 2 + 1),
                               dtype='int64')
        labels = paddle.reshape(labels, shape=[-1, 1])

        accuracy = paddle.metric.accuracy(input=logits, label=labels)
        loss = F.cross_entropy(input=logits, label=labels)
        outputs = {"loss": loss, "accuracy": accuracy}

        return outputs
Ejemplo n.º 4
0
 def phi_and_derphi(a):
     r"""Compute function value and derivative of phi at a.
         phi = f(xk + a * pk)
         phi'(a) = f'(xk + a * pk) * pk
     """
     phi_value, f_grad = _value_and_gradient(f, xk + a * pk)
     phi_grad = paddle.dot(f_grad, pk)
     # return f_grad to be used in bfgs/l-bfgs to compute yk to avoid computint repeatly.
     return phi_value, f_grad, phi_grad
Ejemplo n.º 5
0
    def body(k, done, is_converge, num_func_calls, xk, value, g1, Hk):
        #############    compute pk    #############
        pk = -paddle.matmul(Hk, g1)

        #############    compute alpha by line serach    #############
        if line_search_fn == 'strong_wolfe':
            alpha, value, g2, ls_func_calls = strong_wolfe(
                f=objective_func,
                xk=xk,
                pk=pk,
                initial_step_length=initial_step_length,
                dtype=dtype)
        else:
            raise NotImplementedError(
                "Currently only support line_search_fn = 'strong_wolfe', but the specified is '{}'"
                .format(line_search_fn))
        num_func_calls += ls_func_calls

        #############    update Hk    #############
        sk = alpha * pk
        yk = g2 - g1

        xk = xk + sk
        g1 = g2

        sk = paddle.unsqueeze(sk, 0)
        yk = paddle.unsqueeze(yk, 0)

        rhok_inv = paddle.dot(yk, sk)
        rhok = paddle.static.nn.cond(
            rhok_inv == 0.,
            lambda: paddle.full(shape=[1], fill_value=1000.0, dtype=dtype),
            lambda: 1. / rhok_inv)

        Vk_transpose = I - rhok * sk * yk.t()
        Vk = I - rhok * yk * sk.t()
        Hk = paddle.matmul(paddle.matmul(Vk_transpose, Hk),
                           Vk) + rhok * sk * sk.t()

        k += 1

        #############    check convergence    #############
        gnorm = paddle.linalg.norm(g1, p=np.inf)
        pk_norm = paddle.linalg.norm(pk, p=np.inf)
        paddle.assign(
            done | (gnorm < tolerance_grad) | (pk_norm < tolerance_change),
            done)
        paddle.assign(done, is_converge)
        # when alpha=0, there is no chance to get xk change.
        paddle.assign(done | (alpha == 0.), done)
        return [k, done, is_converge, num_func_calls, xk, value, g1, Hk]
Ejemplo n.º 6
0
    def test_custom_kernel_dot_load(self):
        # test dot load
        x_data = np.random.uniform(1, 5, [2, 10]).astype(np.int8)
        y_data = np.random.uniform(1, 5, [2, 10]).astype(np.int8)
        result = np.sum(x_data * y_data, axis=1).reshape([2, 1])

        import paddle
        paddle.set_device('cpu')
        x = paddle.to_tensor(x_data)
        y = paddle.to_tensor(y_data)
        out = paddle.dot(x, y)

        self.assertTrue(
            np.array_equal(out.numpy(), result),
            "custom kernel dot out: {},\n numpy dot out: {}".format(
                out.numpy(), result))
Ejemplo n.º 7
0
 def forward(self, x, y):
     return paddle.dot(x, y)
Ejemplo n.º 8
0
 def func(x):
     return paddle.dot(x, x)
Ejemplo n.º 9
0
 def forward(self, x, y):
     """
     forward
     """
     x = paddle.dot(x, y)
     return x
Ejemplo n.º 10
0
def dot(x, y):
    return Tensor(paddle.dot(x, y))
Ejemplo n.º 11
0
def get_score(features1, features2):  # feature mean
    score = float(paddle.dot(features1.squeeze(), features2.squeeze()))
    return score