Beispiel #1
0
def general_gaussian(M: int,
                     p,
                     sig,
                     sym: bool = True,
                     dtype: str = 'float64') -> Tensor:
    """Compute a window with a generalized Gaussian shape.

    This function is consistent with scipy.signal.windows.general_gaussian().
    """
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
    w = paddle.exp(-0.5 * paddle.abs(n / sig)**(2 * p))

    return _truncate(w, needs_trunc)
    def test_static(self):
        paddle.enable_static()
        init_data = [paddle.ones(shape, dtype='int32') for shape in self.shapes]
        array = paddle.tensor.create_array('float32', init_data)
        for res, gt in zip(array, init_data):
            self.assertTrue(res.shape, gt.shape)

        # test error with nest list
        with self.assertRaises(TypeError):
            paddle.tensor.create_array('float32',
                                       [init_data[0], [init_data[1]]])

        # test error with not variable
        with self.assertRaises(TypeError):
            paddle.tensor.create_array('float32', ("str"))

        paddle.enable_static()
Beispiel #3
0
    def seq2feats(self, log_seqs, time_matrices):
        seqs = self.item_emb(log_seqs)
        seqs *= self.item_emb._embedding_dim**0.5
        seqs = self.item_emb_dropout(seqs)
        positions = paddle.arange(log_seqs.shape[1]).unsqueeze(0).expand(
            [log_seqs.shape[0], -1])
        abs_pos_K = self.abs_pos_K_emb(positions)
        abs_pos_V = self.abs_pos_V_emb(positions)
        abs_pos_K = self.abs_pos_K_emb_dropout(abs_pos_K)
        abs_pos_V = self.abs_pos_V_emb_dropout(abs_pos_V)

        time_matrix_K = self.time_matrix_K_emb(time_matrices)
        time_matrix_V = self.time_matrix_V_emb(time_matrices)
        time_matrix_K = self.time_matrix_K_dropout(time_matrix_K)
        time_matrix_V = self.time_matrix_V_dropout(time_matrix_V)

        # mask 0th items(placeholder for dry-run) in log_seqs
        # would be easier if 0th item could be an exception for training
        timeline_mask = log_seqs == 0
        seqs *= (log_seqs != 0).astype(paddle.get_default_dtype()).unsqueeze(
            -1)  # broadcast in last dim

        tl = seqs.shape[1]  # time dim len for enforce causality
        attention_mask = (
            paddle.tril(paddle.ones([tl, tl])) == 0).astype(paddle.bool)

        for i in range(len(self.attention_layers)):
            # Self-attention, Q=layernorm(seqs), K=V=seqs
            # seqs = paddle.transpose(seqs, 0, 1) # (N, T, C) -> (T, N, C)
            Q = self.attention_layernorms[i](seqs)
            mha_outputs = self.attention_layers[i](
                Q, seqs, timeline_mask, attention_mask, time_matrix_K,
                time_matrix_V, abs_pos_K, abs_pos_V)
            seqs = Q + mha_outputs
            # seqs = paddle.transpose(seqs, 0, 1) # (T, N, C) -> (N, T, C)

            # Point-wise Feed-forward, actually 2 Conv1D for channel wise fusion
            seqs = self.forward_layernorms[i](seqs)
            seqs = self.forward_layers[i](seqs)

            seqs *= (timeline_mask.astype(int) == 0
                     ).astype(paddle.get_default_dtype()).unsqueeze(-1)

        log_feats = self.last_layernorm(seqs)

        return log_feats
Beispiel #4
0
    def test_api(self):
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            input = fluid.layers.fill_constant(shape=[1],
                                               dtype='int64',
                                               value=5)
            expected_result = np.array([8], dtype='int64')

            output = paddle.tensor.math.increment(input, value=3)
            exe = fluid.Executor(fluid.CPUPlace())
            result = exe.run(fetch_list=[output])
            self.assertEqual((result == expected_result).all(), True)

        with fluid.dygraph.guard():
            input = paddle.ones(shape=[1], dtype='int64')
            expected_result = np.array([2], dtype='int64')
            output = paddle.tensor.math.increment(input, value=1)
            self.assertEqual((output.numpy() == expected_result).all(), True)
Beispiel #5
0
    def build_P_hat_paddle(self, C, P):
        F = self.F
        eps = self.eps
        n = P.shape[0]  # n (= self.I_r_width x self.I_r_height)
        # P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
        P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1))
        C_tile = paddle.unsqueeze(C, axis=0)  # 1 x F x 2
        P_diff = P_tile - C_tile  # n x F x 2
        # rbf_norm: n x F
        rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False)

        # rbf: n x F
        rbf = paddle.multiply(paddle.square(rbf_norm),
                              paddle.log(rbf_norm + eps))
        P_hat = paddle.concat([paddle.ones((n, 1), dtype='float64'), P, rbf],
                              axis=1)
        return P_hat  # n x F+3
Beispiel #6
0
    def func_test_backward_success_2(self):
        # Although var_b is modified inplace after using it, it does not used in gradient computation.
        # The inplace operator doesn't result in incorrect gradient computation.
        with paddle.fluid.dygraph.guard():
            var_a = paddle.ones(shape=[4, 2, 3], dtype="float32")
            var_a.stop_gradient = False

            var_b = var_a**2

            var_b[1:2] = 3  # var_b is modified inplace before using it

            var_c = var_b + var_b  # Here, the grad op of sum doesn't use the value of var_b
            loss = var_c.sum()

            var_b[1:2] = 3  # var_b is modified inplace after using it

            loss.backward()
Beispiel #7
0
    def forward(self,
                src,
                spatial_shapes,
                src_mask=None,
                pos_embed=None,
                valid_ratios=None):
        output = src
        if valid_ratios is None:
            valid_ratios = paddle.ones(
                [src.shape[0], spatial_shapes.shape[0], 2])
        reference_points = self.get_reference_points(spatial_shapes,
                                                     valid_ratios)
        for layer in self.layers:
            output = layer(output, reference_points, spatial_shapes, src_mask,
                           pos_embed)

        return output
Beispiel #8
0
    def test_inverse(self):
        exe = paddle.static.Executor()
        sp = paddle.static.Program()
        mp = paddle.static.Program()
        with paddle.static.program_guard(mp, sp):
            x = paddle.ones(self.out_event_shape)
            t = transform.ReshapeTransform(self.in_event_shape,
                                           self.out_event_shape)
            output = self._t.inverse(x)
        exe.run(sp)
        [output] = exe.run(mp, feed={}, fetch_list=[output])
        expected = np.ones(self.in_event_shape)

        np.testing.assert_allclose(output,
                                   expected,
                                   rtol=config.RTOL.get(str(expected.dtype)),
                                   atol=config.ATOL.get(str(expected.dtype)))
Beispiel #9
0
    def force_decoding(self, beam_search_output, beam_search_state, trg_word,
                       trg_length, time):
        batch_size = paddle.shape(beam_search_output.predicted_ids)[0]
        beam_size = paddle.shape(beam_search_output.predicted_ids)[1]

        ids_dtype = beam_search_output.predicted_ids.dtype
        scores_dtype = beam_search_output.scores.dtype
        parent_ids = paddle.zeros(shape=[batch_size, 1], dtype=ids_dtype)
        scores = paddle.ones(shape=[batch_size, beam_size],
                             dtype=scores_dtype) * -10e9
        scores = paddle.scatter(
            scores.flatten(),
            paddle.arange(0,
                          batch_size * beam_size,
                          step=beam_size,
                          dtype=scores_dtype),
            paddle.zeros([batch_size])).reshape([batch_size, beam_size])

        force_position = paddle.unsqueeze(trg_length > time, [1])
        # NOTE: When the date type of the input of paddle.tile is bool
        # and enable static mode, its stop_gradient must be True .
        force_position.stop_gradient = True
        force_position = paddle.tile(force_position, [1, beam_size])
        crt_trg_word = paddle.slice(trg_word,
                                    axes=[1],
                                    starts=[time],
                                    ends=[time + 1])
        crt_trg_word = paddle.tile(crt_trg_word, [1, beam_size])

        predicted_ids = paddle.where(force_position, crt_trg_word,
                                     beam_search_output.predicted_ids)
        scores = paddle.where(force_position, scores,
                              beam_search_output.scores)
        parent_ids = paddle.where(force_position, parent_ids,
                                  beam_search_output.parent_ids)

        cell_states = beam_search_state.cell_states
        log_probs = paddle.where(force_position, scores,
                                 beam_search_state.log_probs)
        finished = beam_search_state.finished
        lengths = beam_search_state.lengths

        return self.OutputWrapper(scores, predicted_ids,
                                  parent_ids), self.StateWrapper(
                                      cell_states, log_probs, finished,
                                      lengths)
Beispiel #10
0
 def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses):
     """ Create the criterion.
     Parameters:
         num_classes: number of object categories, omitting the special no-object category
         matcher: module able to compute a matching between targets and proposals
         weight_dict: dict containing as key the names of the losses and as values their relative weight.
         eos_coef: relative classification weight applied to the no-object category
         losses: list of all the losses to be applied. See get_loss for list of available losses.
     """
     super().__init__()
     self.num_classes = num_classes
     self.matcher = matcher
     self.weight_dict = weight_dict
     self.eos_coef = eos_coef
     self.losses = losses
     empty_weight = paddle.ones(self.num_classes + 1).requires_grad_(False)
     empty_weight[-1] = self.eos_coef
     self.register_buffer('empty_weight', empty_weight)
Beispiel #11
0
    def _rel_shift(self, x, zero_triu=False):
        x_shape = x.shape
        zero_pad = paddle.zeros(
            [x_shape[0], x_shape[1], x_shape[2], 1], dtype=x.dtype)
        x_padded = paddle.concat([zero_pad, x], axis=-1)

        x_padded = paddle.reshape(
            x_padded,
            shape=[x_shape[0], x_shape[1], x_shape[3] + 1, x_shape[2]])

        x = paddle.reshape(x_padded[:, :, 1:, :], shape=x_shape)

        if zero_triu:
            ones = paddle.ones([x_shape[2], x_shape[3]])
            x = x * paddle.tril(
                ones, diagonal=x_shape[3] - x_shape[2]).unsqueeze([2, 3])

        return x
Beispiel #12
0
def cosine(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a window with a simple cosine shape.
    Parameters:
        M(int): window size.
        sym(bool):whether to return symmetric window.
            The default value is True
        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.cosine().
    """
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
    w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + .5))

    return _truncate(w, needs_trunc)
Beispiel #13
0
def partial_trace_discontiguous(rho, preserve_qubits=None):
    r"""计算量子态的偏迹,可选取任意子系统。

    Args:
        rho (Tensor): 输入的量子态
        preserve_qubits (list): 要保留的量子比特,默认为 None,表示全保留
    """
    if preserve_qubits is None:
        return rho
    else:
        n = int(log2(rho.size) // 2)
        num_preserve = len(preserve_qubits)

        shape = paddle.ones((n + 1, ))
        shape = 2 * shape
        shape[n] = 2**n
        shape = paddle.cast(shape, "int32")
        identity = paddle.eye(2**n)
        identity = paddle.reshape(identity, shape=shape)
        discard = list()
        for idx in range(0, n):
            if idx not in preserve_qubits:
                discard.append(idx)
        addition = [n]
        preserve_qubits.sort()

        preserve_qubits = paddle.to_tensor(preserve_qubits)
        discard = paddle.to_tensor(discard)
        addition = paddle.to_tensor(addition)
        permute = paddle.concat([discard, preserve_qubits, addition])

        identity = paddle.transpose(identity, perm=permute)
        identity = paddle.reshape(identity, (2**n, 2**n))

        result = np.zeros((2**num_preserve, 2**num_preserve),
                          dtype="complex64")
        result = paddle.to_tensor(result)

        for i in range(0, 2**num_preserve):
            bra = identity[i * 2**num_preserve:(i + 1) * 2**num_preserve, :]
            result = result + matmul(matmul(bra, rho),
                                     transpose(bra, perm=[1, 0]))

        return result
Beispiel #14
0
    def forward(self, x, lengths=None):
        C, L = x.shape[1], x.shape[2]  # KP: (N, C, L)

        def _compute_statistics(x, m, axis=2, eps=self.eps):
            mean = (m * x).sum(axis)
            std = paddle.sqrt(
                (m * (x - mean.unsqueeze(axis)).pow(2)).sum(axis).clip(eps))
            return mean, std

        if lengths is None:
            lengths = paddle.ones([x.shape[0]])

        # Make binary mask of shape [N, 1, L]
        mask = length_to_mask(lengths * L, max_len=L)
        mask = mask.unsqueeze(1)

        # Expand the temporal context of the pooling layer by allowing the
        # self-attention to look at global properties of the utterance.
        if self.global_context:
            total = mask.sum(axis=2, keepdim=True).astype('float32')
            mean, std = _compute_statistics(x, mask / total)
            mean = mean.unsqueeze(2).tile((1, 1, L))
            std = std.unsqueeze(2).tile((1, 1, L))
            attn = paddle.concat([x, mean, std], axis=1)
        else:
            attn = x

        # Apply layers
        attn = self.conv(self.tanh(self.tdnn(attn)))

        # Filter out zero-paddings
        attn = paddle.where(
            mask.tile((1, C, 1)) == 0,
            paddle.ones_like(attn) * float("-inf"), attn)

        attn = F.softmax(attn, axis=2)
        mean, std = _compute_statistics(x, attn)

        # Append mean and std of the batch
        pooled_stats = paddle.concat((mean, std), axis=1)
        pooled_stats = pooled_stats.unsqueeze(2)

        return pooled_stats
Beispiel #15
0
    def test_multiple_gpus(self):
        dist.init_parallel_env()
        self.trainer_id = dist.get_rank()

        model_a = SimpleNet(self.trainer_id)
        model_b = SimpleNet(self.trainer_id)

        state_dict = model_a.state_dict()
        model_b.set_state_dict(state_dict)

        model_a = paddle.DataParallel(model_a, find_unused_parameters=True)
        model_b = paddle.DataParallel(model_b, find_unused_parameters=True)

        ones_input = paddle.ones(shape=(batch, in_dim))
        ones_input.stop_gradient = True

        w1_grad_sum = np.zeros((in_dim, out_dim), dtype='float32')
        w2_grad_sum = np.zeros((in_dim, out_dim), dtype='float32')

        for step_id in range(5):
            random_input = paddle.rand(shape=(batch, in_dim))
            random_input.stop_gradient = True

            if step_id % 2 == 0:
                out_a = model_a(random_input)
                out_b = model_b(random_input)
            else:
                out_a = model_a(ones_input)
                out_b = model_b(ones_input)

            out_a.sum().backward()
            out_b.sum().backward()

            self.check_gradient(model_a.parameters())
            self.check_gradient(model_b.parameters())

            # test acc gradient
            w1_grad_sum = self.check_acc(model_a._layers.w1.grad, w1_grad_sum,
                                         model_b._layers.w1.grad)
            w2_grad_sum = self.check_acc(model_a._layers.w2.grad, w2_grad_sum,
                                         model_b._layers.w2.grad)

            model_a.clear_gradients()
Beispiel #16
0
def var(x, axis=None, unbiased=True, keepdim=False, name=None):
    """
    Computes the variance of ``x`` along ``axis`` .

    Args:
        x (Tensor): The input Tensor with data type float32, float64.
        axis (int|list|tuple, optional): The axis along which to perform variance calculations. ``axis`` should be int, list(int) or tuple(int). 
        
            - If ``axis`` is a list/tuple of dimension(s), variance is calculated along all element(s) of ``axis`` . ``axis`` or element(s) of ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` . 
            - If ``axis`` or element(s) of ``axis`` is less than 0, it works the same way as :math:`axis + D` . 
            - If ``axis`` is None, variance is calculated over all elements of ``x``. Default is None.

        unbiased (bool, optional): Whether to use the unbiased estimation. If ``unbiased`` is True, the divisor used in the computation is :math:`N - 1`, where :math:`N` represents the number of elements along ``axis`` , otherwise the divisor is :math:`N`. Default is True.
        keep_dim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the input unless keep_dim is true. Default is False.
        name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor, results of variance along ``axis`` of ``x``, with the same data type as ``x``.

    Examples:
        .. code-block:: python

            import paddle

            x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
            out1 = paddle.var(x)
            # [2.66666667]
            out2 = paddle.var(x, axis=1)
            # [1.         4.33333333]
    """
    if not paddle.in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'var')

    u = mean(x, axis, True, name)
    out = paddle.sum((x - u)**2, axis, keepdim=keepdim, name=name)

    n = paddle.cast(paddle.numel(x), x.dtype) \
        / paddle.cast(paddle.numel(out), x.dtype)
    if unbiased:
        one_const = paddle.ones([1], x.dtype)
        n = where(n > one_const, n - 1., one_const)
    out /= n
    return out
Beispiel #17
0
    def testLoad(self, ):
        model = hub.load(
            self.local_repo, model='MM', source='local', out_channels=8)

        data = paddle.rand((1, 3, 100, 100))
        out = model(data)
        np.testing.assert_equal(out.shape, [1, 8, 50, 50])

        model = hub.load(
            self.github_repo, model='MM', source='github', force_reload=True)

        model = hub.load(
            self.github_repo,
            model='MM',
            source='github',
            force_reload=False,
            pretrained=False)

        model = hub.load(
            self.github_repo.split(':')[0],
            model='MM',
            source='github',
            force_reload=False,
            pretrained=False)

        model = hub.load(
            self.github_repo,
            model='MM',
            source='github',
            force_reload=False,
            pretrained=True,
            out_channels=8)

        data = paddle.ones((1, 3, 2, 2))
        out = model(data)

        gt = np.array([
            1.53965068, 0., 0., 1.39455748, 0.72066200, 0.19773030, 2.09201908,
            0.37345418
        ])
        np.testing.assert_equal(out.shape, [1, 8, 1, 1])
        np.testing.assert_almost_equal(
            out.numpy(), gt.reshape(1, 8, 1, 1), decimal=5)
Beispiel #18
0
    def build_program(self):
        start_prog = paddle.static.Program()
        main_prog = paddle.static.Program()

        with paddle.static.program_guard(main_prog, start_prog):
            x = paddle.static.data('x', shape=[2, 2])
            x.stop_gradient = False
            y = x * x

            v = paddle.ones([2, 2])
            v.stop_gradient = False

            grad_y = paddle.zeros_like(y)
            grad_y.stop_gradient = False
            grad_x = paddle.static.gradients(y, x, grad_y)
            # test with single targets
            jvp = paddle.static.gradients(grad_x, grad_y, v)

        return start_prog, main_prog, [grad_x, jvp]
    def test_dim4(self):
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
        expected_np = np.array([[[[0, 3], [2, 2], [2, 2]],
                                 [[2, 2], [1, 4], [2, 2]],
                                 [[2, 2], [2, 2], [2, 5]],
                                 [[2, 2], [2, 2], [2, 2]]],
                                [[[6, 9], [2, 2], [2, 2]],
                                 [[2, 2], [7, 10], [2, 2]],
                                 [[2, 2], [2, 2], [8, 11]],
                                 [[2, 2], [2, 2], [2, 2]]]]).astype('float32')
        expected_grad = np.array([[[[0, 0], [1, 1], [1, 1]],
                                   [[1, 1], [0, 0], [1, 1]],
                                   [[1, 1], [1, 1], [0, 0]],
                                   [[1, 1], [1, 1], [1, 1]]],
                                  [[[0, 0], [1, 1], [1, 1]],
                                   [[1, 1], [0, 0], [1, 1]],
                                   [[1, 1], [1, 1], [0, 0]],
                                   [[1, 1], [1, 1], [1,
                                                     1]]]]).astype('float32')

        for idx, p in enumerate(self.places):
            if idx == 0:
                paddle.set_device('cpu')
            else:
                paddle.set_device('gpu')
            for dtype in self.typelist:
                v = paddle.to_tensor(np.arange(12).reshape(2, 2, 3),
                                     dtype=dtype)
                var = (np.random.random() + 1)
                x = paddle.ones((2, 4, 3, 2), dtype=dtype)
                x.stop_gradient = False
                y = x * 2
                ny = y.fill_diagonal_tensor(v, offset=0, dim1=1, dim2=2)
                loss = ny.sum()
                loss.backward()

                self.assertEqual(
                    (ny.numpy().astype('float32') == expected_np).all(), True)
                self.assertEqual(
                    (y.grad.numpy().astype('float32') == expected_grad).all(),
                    True)
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
Beispiel #20
0
    def forward(self,
                input_ids,
                position_ids=None,
                attention_mask=None,
                use_cache=False,
                cache=None):
        self.checkpoints = []
        if position_ids is None:
            past_length = 0
            if cache is not None:
                past_length = paddle.shape(cache[0].k)[-2]
            position_ids = paddle.arange(past_length,
                                         paddle.shape(input_ids)[-1] +
                                         past_length,
                                         dtype='int64')
            position_ids = position_ids.unsqueeze(0)
            # .expand_as(input_ids)
            position_ids = paddle.fluid.layers.expand_as(
                position_ids, input_ids)
        embedding_output = self.embeddings(input_ids=input_ids,
                                           position_ids=position_ids)

        # TODO, use registered buffer
        causal_mask = paddle.tensor.triu(paddle.ones(
            (paddle.shape(input_ids)[-1], paddle.shape(input_ids)[-1])) * -1e9,
                                         diagonal=1)

        if attention_mask is not None:
            attention_mask = attention_mask + causal_mask
        else:
            attention_mask = causal_mask

        # The tensor returned by triu not in static graph.
        attention_mask.stop_gradient = True

        encoder_outputs = self.decoder(embedding_output,
                                       memory=None,
                                       tgt_mask=attention_mask,
                                       use_cache=use_cache,
                                       cache=cache)
        self.checkpoints.extend(self.decoder.checkpoints)
        return encoder_outputs
Beispiel #21
0
def future_mask(time_steps, dtype="bool"):
    """Generate lower triangular mask.
    
    It is used at transformer decoder to prevent the decoder to see future 
    information.

    Parameters
    ----------
    time_steps : int
        Decoder time steps.
    dtype : str, optional
        The data type of the generate mask, by default "bool".

    Returns
    -------
    Tensor
        The generated mask.
    """
    mask = paddle.tril(paddle.ones([time_steps, time_steps]))
    return paddle.cast(mask, dtype)
Beispiel #22
0
def limit_by_capacity(topk_idx, num_expert, world_size, capacity, group=None):
    with paddle.no_grad():
        capacity = paddle.ones(shape=[num_expert],
                               dtype=paddle.int64) * capacity
        pos, lec, gec = count_by_gate(topk_idx,
                                      num_expert,
                                      world_size,
                                      require_pos=False,
                                      group=group)
        new_gec = _limit_by_capacity(gec, capacity, world_size)
        if world_size > 1:
            assert group.nranks == world_size
            new_lec = _alltoall(new_gec, group=group)
        else:
            new_lec = new_gec

        topk_idx = _prune_gate_by_capacity(topk_idx, new_lec, num_expert,
                                           world_size)

    return new_lec, new_gec, topk_idx
Beispiel #23
0
    def func_bool(self):
        expected_np = np.array([[False, True, True], [True, False, True],
                                [True, True, False]])

        typelist = ['bool']
        places = [fluid.CPUPlace()]
        if fluid.core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))

        for idx, p in enumerate(places):
            if idx == 0:
                paddle.set_device('cpu')
            else:
                paddle.set_device('gpu')
            for dtype in typelist:
                x = paddle.ones((3, 3), dtype=dtype)
                x.stop_gradient = True
                x.fill_diagonal_(0, offset=0, wrap=True)

                self.assertEqual((x.numpy() == expected_np).all(), True)
Beispiel #24
0
def export_model(net, cfg, save_dir):
    net.forward = paddle.jit.to_static(net.forward)
    input_shape = [1] + list(cfg.val_dataset[0][0].shape)
    input_var = paddle.ones(input_shape)
    out = net(input_var)

    save_path = os.path.join(save_dir, 'model')
    paddle.jit.save(net, save_path, input_spec=[input_var])

    yml_file = os.path.join(save_dir, 'deploy.yaml')
    with open(yml_file, 'w') as file:
        transforms = cfg.dic['val_dataset']['transforms']
        data = {
            'Deploy': {
                'transforms': transforms,
                'model': 'model.pdmodel',
                'params': 'model.pdiparams'
            }
        }
        yaml.dump(data, file)
    def func_test_backward_error(self):
        # It raises an error because the inplace operator will result
        # in incorrect gradient computation.
        with paddle.fluid.dygraph.guard():
            var_a = paddle.ones(shape=self.input_shape, dtype="float32")
            var_a.stop_gradient = False

            var_b = var_a**2

            # Here, the gradient computation will use the value of var_b
            var_c = var_b**2
            view_var_b = self.view_api_processing(var_b)
            view_var_b[0] = 2.  # var_b is modified inplace

            loss = paddle.nn.functional.relu(var_c)
            with self.assertRaisesRegexp(
                    RuntimeError,
                    "received tensor_version:{} != wrapper_version_snapshot:{}"
                    .format(1, 0)):
                loss.backward()
Beispiel #26
0
    def forward(self, h, x, mess_graph):
        """forward"""
        mask = paddle.ones([h.shape[0], 1])
        mask[0] = 0
        for it in range(self.depth):
            h_nei = index_select_ND(h, 0, mess_graph)
            sum_h = paddle.sum(h_nei, axis=1)
            z_input = paddle.concat([x, sum_h], axis=1)
            z = F.sigmoid(self.W_z(z_input))

            r_1 = paddle.reshape(self.W_r(x), shape=[-1, 1, self.hidden_size])
            r_2 = self.U_r(h_nei)
            r = F.sigmoid(r_1 + r_2)

            gated_h = r * h_nei
            sum_gated_h = paddle.sum(gated_h, axis=1)
            h_input = paddle.concat([x, sum_gated_h], axis=1)
            pre_h = F.tanh(self.W_h(h_input))
            h = (1.0 - z) * sum_h + z * pre_h
            h = h * mask
        return h
Beispiel #27
0
    def test_backward_error(self):
        # It raises an error because the inplace operator will result
        # in incorrect gradient computation.
        with paddle.fluid.dygraph.guard():
            var_a = paddle.ones(shape=[4, 2, 3], dtype="float32")
            var_a.stop_gradient = False

            var_b = var_a**2

            # Here, the gradient computation will use the value of var_b
            var_c = var_b**2
            var_b[1:2] = 3.3  # var_b is modified inplace after using it

            var_d = var_b**2

            loss = paddle.nn.functional.relu(var_c + var_d)
            with self.assertRaisesRegexp(
                    RuntimeError,
                    "received tensor_version:{} != wrapper_version_snapshot:{}".
                    format(1, 0)):
                loss.backward()
Beispiel #28
0
def graph_norm(graph, feature):
    """Implementation of graph normalization
   
    Reference Paper: BENCHMARKING GRAPH NEURAL NETWORKS
   
    Each node features is divied by sqrt(num_nodes) per graphs.

    Args:
        graph: the graph object from (:code:`Graph`)

        feature: A tensor with shape (num_nodes, feature_size).

    Return:
        A tensor with shape (num_nodes, hidden_size)
    """

    nodes = paddle.ones(shape=[graph.num_nodes, 1], dtype="float32")
    norm = graph_pool(graph, nodes, pool_type="sum")
    norm = paddle.sqrt(norm)
    norm = paddle.gather(norm, graph.graph_node_id)
    return feature / norm
Beispiel #29
0
    def test_api(self):
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            input0 = fluid.layers.fill_constant(
                shape=[2, 3], dtype='int64', value=5)
            input1 = fluid.layers.fill_constant(
                shape=[2, 3], dtype='int64', value=3)
            expected_result = np.empty((2, 3))
            expected_result.fill(8)
            sum_value = paddle.add_n([input0, input1])
            exe = fluid.Executor(fluid.CPUPlace())
            result = exe.run(fetch_list=[sum_value])

            self.assertEqual((result == expected_result).all(), True)

        with fluid.dygraph.guard():
            input0 = paddle.ones(shape=[2, 3], dtype='float32')
            expected_result = np.empty((2, 3))
            expected_result.fill(2)
            sum_value = paddle.add_n([input0, input0])

            self.assertEqual((sum_value.numpy() == expected_result).all(), True)
Beispiel #30
0
    def __init__(self,
                 block,
                 layers,
                 num_classes=1000,
                 zero_init_residual=False):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2D(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias_attr=False)
        self.bn1 = nn.BatchNorm2D(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2D((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self._sub_layers:
            if isinstance(m, nn.Conv2D):
                m.weight_attr = paddle.ParamAttr(
                    initializer=nn.initializer.KaimingNormal())
            elif isinstance(m, nn.BatchNorm2D):
                m.weight.set_value(paddle.ones(m.weight.shape))
                m.bias.set_value(paddle.zeros(m.bias.shape))

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self._sub_layers:
                if isinstance(m, Bottleneck):
                    m.bn3.weight.set_value(paddle.zeros(m.bn3.weight.shape))
                elif isinstance(m, BasicBlock):
                    m.bn2.weight.set_value(paddle.zeros(m.bn2.weight.shape))