Exemple #1
0
    def execute(self, pc1, pc2, reduction='mean', dims='BNC'):
        assert dims in ['BNC', 'BCN']
        if dims == 'BCN':
            pc1, pc2 = pc1.permute(0, 2, 1), pc2.permute(0, 2, 1)

        batch_size_1, N, _ = pc1.shape
        batch_size_2, M, _ = pc2.shape
        assert batch_size_1 == batch_size_2
        batch_size = batch_size_1

        temp = jt.zeros([batch_size, (N + M) * 2], pc1.dtype)
        match = jt.code(
            shape=[batch_size, M, N],
            dtype=pc1.dtype,
            inputs=[pc1, pc2, temp],
            cuda_header=EMD_gpu_header,
            cuda_src=approxmatch_gpu_src,
        )

        emd = jt.code(
            shape=[batch_size],
            dtype=pc1.dtype,
            inputs=[pc1, pc2, match],
            cuda_header=EMD_gpu_header,
            cuda_src=matchcost_gpu_src,
        )

        self.saved_vars = (pc1, pc2, match, reduction)

        if reduction is None:
            return emd
        elif reduction == 'sum':
            return emd.sum()
        elif reduction == 'mean':
            return emd.mean()
Exemple #2
0
 def test_error_msg_trace_py_var(self):
     a = jt.array([3, 2, 1])
     b = jt.code(a.shape,
                 a.dtype, [a],
                 cpu_header="""
             #include <algorithm>
             @alias(a, in0)
             @alias(b, out)
         """,
                 cpu_src="""
             for (int i=0; i<a_shape0; i++)
                 @b(i) = @a(i);
             std::sort(&@b(0), &@b(in0_shape0));
             throw std::runtime_error("???");
         """)
     msg = ""
     try:
         print(b)
     except Exception as e:
         msg = str(e)
     print(msg)
     assert "[Reason]: ???" in msg
     assert "[Input]: int32[3,]" in msg
     assert "[OP TYPE]: code" in msg
     assert "[Async Backtrace]:" in msg
     assert "test_error_msg.py:" in msg
Exemple #3
0
def mask_prob_cuda(embed_pixel, embed_center, sigma_center, boxes, box_areas,
                   area_sum, mask_width):
    assert embed_pixel.ndim == 2, "embed_pixel should be MxDim"
    assert embed_center.ndim == 2, "embed_center should be NxDim"
    assert sigma_center.ndim == 1, "sigma_center should be N"
    assert embed_pixel.shape[1] == embed_center.shape[1], "Dim should the same"
    assert embed_center.shape[0] == sigma_center.shape[
        0], "center number should be the same"
    assert embed_center.shape[0] == boxes.shape[
        0], "center number and box number should be the same"

    output_shape = (embed_pixel.shape[0], embed_center.shape[0])
    if output_shape[0] * output_shape[1] == 0:
        return jt.array([], embed_pixel.dtype)
    output_type = embed_pixel.dtype
    option = jt.empty((0, ))
    option.compile_options = {
        "area_sum": int(area_sum),
        "mask_width": int(mask_width)
    }
    inputs = [
        embed_pixel, embed_center, sigma_center, boxes, box_areas, option
    ]
    output = jt.code(output_shape,
                     output_type,
                     inputs,
                     cuda_header=CUDA_HEADER,
                     cuda_src=CUDA_SRC)
    return output
Exemple #4
0
    def execute(self, x):
        '''
        Parameters
        ----------
        x: jt.Var, (B, N, 3)

        Returns
        -------
        y: jt.Var, (B, n_samples, 3)
        '''
        batch_size, n_points, n_coords = x.shape

        assert self.n_samples <= n_points
        assert n_coords == 3
        assert x.dtype == 'float32'

        block_size = optimal_block(batch_size)

        cuda_src = self.cuda_src.replace('#block_size', str(block_size))

        idxs_shape = [batch_size, self.n_samples]
        idxs = jt.code(idxs_shape, 'int32', [x,], cuda_src=cuda_src)
        
        y = x.reindex([batch_size, self.n_samples, 3], [
            'i0',               # Batchid
            '@e0(i0, i1)',      # Nid
            'i2'
        ], extras=[idxs])

        return y
Exemple #5
0
 def grad(self, grad_x):
     x = self.save_vars
     return jt.code(x.shape, x.dtype, [x, grad_x],
         cpu_src='''
             for (int i=0; i<in0_shape0; i++)
                 @out(i) = @in1(i)*@in0(i)*4;
         ''')
Exemple #6
0
    def test_type(self):
        import numpy as np
        assert str(jt.NanoString(float)) == "float"
        assert str(jt.NanoString(np.float)) == "float"
        assert str(jt.NanoString(np.float32)) == "float32"
        assert str(jt.NanoString(np.float64)) == "float64"
        assert str(jt.NanoString(np.int8)) == "int8"
        assert str(jt.NanoString(np.array([1, 2, 3]).dtype)) == "int64"

        assert str(jt.NanoString(jt.float)) == "float"
        assert str(jt.NanoString(jt.float32)) == "float32"
        assert str(jt.NanoString(jt.float64)) == "float64"
        assert str(jt.NanoString(jt.int8)) == "int8"
        assert str(jt.NanoString(jt.array([1, 2, 3]).dtype)) == "int64"
        assert str(jt.NanoString(jt.sum)) == "add"

        def get_error_str(call):
            es = ""
            try:
                call()
            except Exception as e:
                es = str(e)
            return es

        e = get_error_str(lambda: jt.code([
            1,
        ], {}, [1], cpu_header=""))
        assert "help(jt.ops.code)" in e
        assert "cpu_header=str" in e
        e = get_error_str(lambda: jt.NanoString([1, 2, 3], f**k=1))
        assert "f**k=int" in str(e)
        assert "(list, )" in str(e)
Exemple #7
0
 def execute(self, x):
     self.save_vars = x
     return jt.code(x.shape, x.dtype, [x],
         cpu_src='''
             for (int i=0; i<in0_shape0; i++)
                 @out(i) = @in0(i)*@in0(i)*2;
         ''')
Exemple #8
0
def chamfer_loss(pc1, pc2, reduction='mean', sqrt=True):
    '''
    return the chamfer loss from pc1 to pc2.

    Parameters:
    ===========
        pc1:  [B, N, xyz]
        pc2:  [B, N, xyz]
        reduction: 'mean', 'sum', or None
    '''
    batch_size_1, n_samples_pc1, _ = pc1.shape
    batch_size_2, n_samples_pc2, _ = pc2.shape

    assert batch_size_1 == batch_size_2
    batch_size = batch_size_1

    idx = jt.code([batch_size, n_samples_pc1],
                  'int32', [pc1, pc2],
                  cpu_src=cpu_src,
                  cuda_src=cuda_src)

    nearest_pts = select_vertices(pc2, idx)
    if sqrt:
        chamfer_distance = (((pc1 - nearest_pts)**2).sum(dim=-1)).sqrt()
    else:
        chamfer_distance = (((pc1 - nearest_pts)**2).sum(dim=-1))

    if reduction is None:
        return chamfer_distance
    elif reduction == 'sum':
        return jt.sum(chamfer_distance)
    elif reduction == 'mean':
        return jt.mean(chamfer_distance)
Exemple #9
0
 def test_cuda2(self):
     a = jt.random((100,100))
     b = jt.random((100,100))
     c = jt.code(a.shape, a.dtype, [a,b],
         cuda_src='''
             __global__ static void kernel1(@ARGS_DEF) {
                 @PRECALC
                 for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
                 for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
                     @out(i,j) = @in0(i,j)*@in1(i,j);
             }
             kernel1<<<32, 32>>>(@ARGS);
         ''',
         cuda_grad_src = ['''
             __global__ static void kernel(@ARGS_DEF) {
                 @PRECALC
                 for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
                 for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
                     @out(i,j) = @dout(i,j)*@in1(i,j);
             }
             kernel<<<32, 32>>>(@ARGS);
         ''', '''
             __global__ static void kernel(@ARGS_DEF) {
                 @PRECALC
                 @pout(0,0);
                 for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
                 for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
                     @out(i,j) = @dout(i,j)*@in0(i,j);
             }
             kernel<<<32, 32>>>(@ARGS);
         '''])
     da, db = jt.grad(c, [a, b])
     assert np.allclose(c.data, a.data*b.data), (c.data, a.data*b.data)
     assert np.allclose(da.data, b.data)
     assert np.allclose(db.data, a.data)
Exemple #10
0
def searchsorted(sorted, values, right=False):
    """
    Find the indices from the innermost dimension of `sorted` for each `values`.

Example::

    sorted = jt.array([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]])
    values = jt.array([[3, 6, 9], [3, 6, 9]])
    ret = jt.searchsorted(sorted, values)
    assert (ret == [[1, 3, 4], [1, 2, 4]]).all(), ret

    ret = jt.searchsorted(sorted, values, right=True)
    assert (ret == [[2, 3, 5], [1, 3, 4]]).all(), ret
    
    sorted_1d = jt.array([1, 3, 5, 7, 9])
    ret = jt.searchsorted(sorted_1d, values)
    assert (ret == [[1, 3, 4], [1, 3, 4]]).all(), ret


    """
    _searchsorted_header = f"""
namespace jittor {{

@python.jittor.auto_parallel(2)
inline static void searchsorted(
    int batch_num, int batch_id, int value_num, int value_id,
    int sorted_num, int batch_stride,
    {sorted.dtype}* __restrict__  sort_p, {values.dtype}* __restrict__  value_p, 
    int32* __restrict__ index_p) {{
    int32 l = batch_id * batch_stride;
    int32 r = l + sorted_num;
    auto v = value_p[batch_id * value_num + value_id];
    while (l<r) {{
        int32 m = (l+r)/2;
        if (sort_p[m] {"<=" if right else "<"} v)
            l = m+1;
        else
            r = m;
    }}
    index_p[batch_id * value_num + value_id] = l - batch_id * batch_stride;
}}

}}
"""
    _searchsorted_src = """
    int value_num = in1->shape[in1->shape.size()-1];
    int sorted_num = in0->shape[in0->shape.size()-1];
    int32 batch_num = in0->num / sorted_num;
    int32 batch_num2 = in1->num / value_num;
    int32 batch_stride = batch_num == 1 ? 0 : sorted_num;
    CHECK(batch_num == batch_num2 || batch_num == 1);

    searchsorted(batch_num2, 0, value_num, 0, sorted_num, batch_stride, in0_p, in1_p, out0_p);
"""
    return jt.code(values.shape,
                   "int32", [sorted, values],
                   cpu_header=_searchsorted_header,
                   cpu_src=_searchsorted_src,
                   cuda_header=_searchsorted_header,
                   cuda_src=_searchsorted_src)
Exemple #11
0
def roi_pool(input,rois,output_size,spatial_scale):
    output_size = _pair(output_size)
    spatial_scale = jt.array([spatial_scale])
    output_shapes = [(rois.shape[0], input.shape[1], output_size[0], output_size[1])]*2
    inputs = [input,rois,spatial_scale]
    output_types = [input.dtype,'int32']
    output,arg_output = jt.code(output_shapes,output_types,inputs,cuda_header=CUDA_HEADER,cuda_src=CUDA_SRC,cuda_grad_src=CUDA_GRAD_SRC)
    return output
Exemple #12
0
 def test_array_migrate(self):
     with jt.flag_scope(use_cuda=1):
         a = jt.array(np.float32([1,2,3]))
         b = jt.code(a.shape, a.dtype, [a], cpu_src="""
             for (int i=0; i<in0shape0; i++)
                 @out(i) = @in0(i)*@in0(i)*2;
         """)
         assert (b.data==[2,8,18]).all()
 def test_parallel(self):
     a = jt.code([4],
                 "int",
                 cpu_src="""
         #pragma omp parallel num_threads(4)
         @out(omp_get_thread_num()) = 456;
     """,
                 cpu_header='#include <omp.h>').data
     assert (a == [456] * 4).all(), a
Exemple #14
0
 def test_return_multi_output(self):
     a = jt.array([3, 2, 1])
     b = jt.array([1, 2])
     c = jt.array([3, 4, 5, 6])
     jt.code([a], [b, c],
             cpu_src="""
             @alias(a, in0)
             @alias(b, out0)
             @alias(c, out1)
             for (int i=0; i<a_shape0; i++) {
                 if (i<b_shape0) @b(i) += @a(i);
                 if (i<c_shape0) @c(i) += @a(i);
             }
         """)
     assert b.shape == [2]
     assert c.shape == [4]
     assert (b.data == [4, 4]).all()
     assert (c.data[:3] == [6, 6, 6]).all()
Exemple #15
0
def chamfer_loss(pc1, pc2, reduction='mean', dims='BNC', bidirectional=False):
    ''' return the chamfer loss from pc1 to pc2.

    :param pc1:  input point cloud
    :type pc1: jittor array

    :param pc2:  input point cloud
    :type pc2: jittor array

    :param reduction: reduction method in batches, can be 'mean', 'sum', or None. Default: 'mean'.
    :type reduction: str, optional
            
    :param dims: a string that represents each dimension, can be
            '[BNC]' ([batch, number of points, xyz]), or
            '[BCN]' ([batch, xyz, number of points]). Default: 'BNC'.
    :type dims: str, optional

    Example:

    >>> import jittor as jt
    >>> from jittor.loss3d import chamfer_loss
    >>> jt.flags.use_cuda = True
    >>> pc1 = jt.rand([10, 100, 3], dtype=jt.float32)
    >>> pc2 = jt.rand([10, 100, 3], dtype=jt.float32)
    >>> cf = chamfer_loss(pc1, pc2, dims='BNC', bidirectional=True)
    >>> print('chamfer loss =', cf.item())
    '''
    if bidirectional:
        return chamfer_loss(pc1, pc2, reduction, dims) + chamfer_loss(
            pc2, pc1, reduction, dims)

    assert dims in ['BNC', 'BCN']
    if dims == 'BCN':
        pc1, pc2 = pc1.permute(0, 2, 1), pc2.permute(0, 2, 1)

    batch_size_1, N, _ = pc1.shape
    batch_size_2, M, _ = pc2.shape
    assert batch_size_1 == batch_size_2
    batch_size = batch_size_1

    idx = jt.code([batch_size, N],
                  'int32', [pc1, pc2],
                  cpu_src=cpu_src,
                  cuda_src=cuda_src)

    nearest_pts = pc2.reindex([batch_size, idx.shape[1], 3],
                              ['i0', '@e0(i0, i1)', 'i2'],
                              extras=[idx])

    chamfer_distance = (((pc1 - nearest_pts)**2).sum(dim=-1)).sqrt()
    if reduction is None:
        return chamfer_distance
    elif reduction == 'sum':
        return jt.sum(chamfer_distance)
    elif reduction == 'mean':
        return jt.mean(chamfer_distance)
 def test_device_allocator(self):
     a = jt.array([1, 2, 3, 4, 5])
     b = a + 1
     c = jt.code(a.shape,
                 a.dtype, [b],
                 cpu_src="""
         for (int i=0; i<in0_shape0; i++)
             @out(i) = @in0(i)*@in0(i)*2;
     """)
     assert (c.data == [8, 18, 32, 50, 72]).all()
Exemple #17
0
 def test_header(self):
     a = jt.array([3, 2, 1])
     b = jt.code(a.shape,
                 a.dtype, [a],
                 header='#include <algorithm>',
                 cpu_src="""
             for (int i=0; i<in0shape0; i++)
                 @out(i) = @in0(i);
             std::sort(&@out(0), &@out(in0shape0));
         """)
     assert (b.data == [1, 2, 3]).all()
Exemple #18
0
 def execute(self, a, b):
     self.save_vars = a, b
     return jt.code(a.shape, a.dtype, [a,b],
         cuda_src='''
             __global__ static void kernel1(@ARGS_DEF) {
                 @PRECALC
                 for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
                 for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
                     @out(i,j) = @in0(i,j)*@in1(i,j);
             }
             kernel1<<<32, 32>>>(@ARGS);
         ''')
Exemple #19
0
def simple_presum(x):
    src = '''
__inline_static__
@python.jittor.auto_parallel(1)
void kernel(int n0, int i0, in0_type* x, in0_type* out, int nl) {
    out[i0*(nl+1)] = 0;
    for (int i=0; i<nl; i++)
        out[i0*(nl+1)+i+1] = out[i0*(nl+1)+i] + x[i0*(nl+1)+i];
}
kernel(in0->num/in0->shape[in0->shape.size()-1], 0, in0_p, out0_p, in0->num);
    '''
    return jt.code(x.shape[:-1]+(x.shape[-1]+1,), x.dtype, [x],
        cpu_src=src, cuda_src=src)
Exemple #20
0
 def execute(self, x_q, x_r): # n_points, c_dim
     batch_size, c_dim, q_points = x_q.shape 
     batch_size, c_dim, r_points = x_r.shape 
     out_idx_shapes = [batch_size, self.k, q_points]
     tmp_dist = jt.empty((batch_size, r_points, q_points), "float32")
     idxs,  = jt.code(
         [out_idx_shapes],
         ['int32'],
         [x_r, x_q, tmp_dist], # in0 r point in1 q point 
         cuda_src=self.cuda_src,
         cuda_header=self.cuda_inc,
     )
     return idxs
Exemple #21
0
    def grad(self, grad):
        x, numangle, numrho = self.save_vars
        cuda_src_backward = csb.replace('#numangle', str(numangle))
        cuda_src_backward = cuda_src_backward.replace('#numrho', str(numrho))

        irho = int((h * h + w * w)**0.5 + 1) / float((numrho - 1))
        itheta = 3.14159265358979323846 / numangle
        angle = jt.arange(numangle) * itheta
        tabCos = angle.cos() / irho
        tabSin = angle.sin() / irho

        return jt.code([x.shape], [x.dtype], [x, grad, tabCos, tabSin],
                       cuda_src=cuda_src_backward)
Exemple #22
0
 def grad(self, grad):
     a, b = self.save_vars
     return jt.code([a.shape, b.shape], [a.dtype, b.dtype], [a, b, grad],
         cuda_src='''
             __global__ static void kernel2(@ARGS_DEF) {
                 @PRECALC
                 for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
                 for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x) {
                     @out0(i,j) = @in2(i,j)*@in1(i,j);
                     @out1(i,j) = @in2(i,j)*@in0(i,j);
                 }
             }
             kernel2<<<32, 32>>>(@ARGS);
         ''')
Exemple #23
0
    def grad(self, grad):
        pc1, pc2, match, reduction = self.saved_vars

        if reduction == 'sum':
            grad = jt.ones([pc1.shape[0]]) * grad
        elif reduction == 'mean':
            grad = jt.ones([pc1.shape[0]]) * grad / pc1.shape[0]

        grad_pc1 = jt.code(
            shape=pc1.shape,
            dtype=pc1.dtype,
            inputs=[grad, pc1, pc2, match],
            cuda_src=matchcost_grad1_gpu_src,
        )

        grad_pc2 = jt.code(
            shape=pc2.shape,
            dtype=pc2.dtype,
            inputs=[grad, pc1, pc2, match],
            cuda_src=matchcost_grad2_gpu_src,
        )

        return grad_pc1, grad_pc2
Exemple #24
0
 def test_header(self):
     a = jt.array([3, 2, 1])
     b = jt.code(a.shape,
                 a.dtype, [a],
                 cpu_header="""
             #include <algorithm>
             @alias(a, in0)
             @alias(b, out)
         """,
                 cpu_src="""
             for (int i=0; i<a_shape0; i++)
                 @b(i) = @a(i);
             std::sort(&@b(0), &@b(in0_shape0));
         """)
     assert (b.data == [1, 2, 3]).all()
Exemple #25
0
    def execute(self,featuremap, boxes, box_ind):
        """
        RoIAlign based on crop_and_resize.
        See more details on https://github.com/longcw/RoIAlign.pytorch
        :param featuremap: NxCxHxW
        :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization**
        :param box_ind: M
        :return: MxCxoHxoW
        """
        x1, y1, x2, y2 = [boxes.reindex([boxes.shape[0],1], ["i0", str(i)]) for i in range(4)]
        image_height, image_width = featuremap.shape[2:4]

        if self.transform_fpcoor:
            spacing_w = (x2 - x1) / float(self.crop_width)
            spacing_h = (y2 - y1) / float(self.crop_height)

            nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1)
            ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1)
            nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1)
            nh = spacing_h * float(self.crop_height - 1) / float(image_height - 1)

            boxes = jt.contrib.concat((ny0, nx0, ny0 + nh, nx0 + nw), 1)
        else:
            x1 = x1 / float(image_width - 1)
            x2 = x2 / float(image_width - 1)
            y1 = y1 / float(image_height - 1)
            y2 = y2 / float(image_height - 1)
            boxes = jt.contrib.concat((y1, x1, y2, x2), 1)

        num_boxes = boxes.shape[0]
        depth = featuremap.shape[1]

        output_shapes = (num_boxes, depth, self.crop_height, self.crop_width)
        output_types = featuremap.dtype
        extrapolation_value = jt.array([self.extrapolation_value])
        inputs = [featuremap,boxes,box_ind,extrapolation_value]
        cpu_header = ROIALIGN_CPU_HEADER
        cpu_src =ROIALIGN_CPU_SRC
        cpu_grad_src = ROIALIGN_CPU_GRAD_SRC

        cuda_header = ROIALIGN_CUDA_HEADER
        cuda_src= ROIALIGN_CUDA_SRC
        cuda_grad_src= ROIALIGN_CUDA_GRAD_SRC

        output = jt.code(output_shapes,output_types,inputs,cpu_header = cpu_header,
            cpu_src=cpu_src,cpu_grad_src=cpu_grad_src,cuda_header=cuda_header,cuda_src=cuda_src,cuda_grad_src=cuda_grad_src)

        return output
Exemple #26
0
def roi_align(input, rois, output_size, spatial_scale, sampling_ratio):
    output_size = _pair(output_size)
    options = jt.array([spatial_scale, sampling_ratio])
    output_shapes = (rois.shape[0], input.shape[1], output_size[0],
                     output_size[1])
    inputs = [input, rois, options]
    output_types = input.dtype
    if rois.shape[0] == 0:
        return jt.zeros(output_shapes, input.dtype)
    output = jt.code(output_shapes,
                     output_types,
                     inputs,
                     cuda_header=CUDA_HEADER,
                     cuda_src=CUDA_SRC,
                     cuda_grad_src=CUDA_GRAD_SRC)
    return output
Exemple #27
0
    def test_cuda(self):
        a = jt.random([100000])
        b = jt.random([100000])
        c = jt.code(a.shape,
                    a.dtype, [a, b],
                    cuda_header='''
            namespace jittor {
            __global__ static void kernel1(@ARGS_DEF) {
                @PRECALC
                int i = threadIdx.x + blockIdx.x * blockDim.x;
                int stride = blockDim.x * gridDim.x;
                for (int i=0; i<in0shape0; i++)
                    @out(i) = @in0(i)*@in1(i);
            }

            __global__ static void kernel2(@ARGS_DEF) {
                @PRECALC
                int i = threadIdx.x + blockIdx.x * blockDim.x;
                int stride = blockDim.x * gridDim.x;
                for (int i=0; i<in0shape0; i++)
                    @out(i) = @dout(i)*@in1(i);
            }

            __global__ static void kernel3(@ARGS_DEF) {
                @PRECALC
                int i = threadIdx.x + blockIdx.x * blockDim.x;
                int stride = blockDim.x * gridDim.x;
                for (int i=0; i<in0shape0; i++)
                    @out(i) = @dout(i)*@in0(i);
            }

            }
            ''',
                    cuda_src='''
                kernel1<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
            ''',
                    cuda_grad_src=[
                        '''
                kernel2<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
            ''', '''
                kernel3<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
            '''
                    ])
        da, db = jt.grad(c, [a, b])
        assert np.allclose(c.data, a.data * b.data), (c.data, a.data * b.data)
        assert np.allclose(da.data, b.data)
        assert np.allclose(db.data, a.data)
Exemple #28
0
def argmax_pool(x, size, stride, padding=0):
    y_shape = list(x.shape)
    y_shape[2] = (x.shape[2] + padding * 2 - size) // stride + 1
    y_shape[3] = (x.shape[3] + padding * 2 - size) // stride + 1

    y = jt.code(y_shape,
                x.dtype, [x],
                cpu_src=f'''
            for (int i=0; i<out_shape0; i++)
            for (int j=0; j<out_shape1; j++)
            for (int k=0; k<out_shape2; k++)
            for (int l=0; l<out_shape3; l++) {{
                int kx=k*{stride}+{size}/2-{padding};
                int ky=l*{stride}+{size}/2-{padding};
                @out(i,j,k,l) = @in0(i,j,kx,ky);
                for (int p=kx-{size}/2;p<=kx+{size}/2;p++)
                for (int q=ky-{size}/2;q<=ky+{size}/2;q++)
                    if (p>=0 && q>=0 && p<in0_shape2 && q<in0_shape3)
                    if (@out(i,j,k,l) < @in0(i,j,p,q))
                        @out(i,j,k,l) = @in0(i,j,p,q);
            }}
        ''',
                cpu_grad_src=[
                    f'''
            for (int i=0; i<out_shape0; i++)
            for (int j=0; j<out_shape1; j++)
            for (int k=0; k<out_shape2; k++)
            for (int l=0; l<out_shape3; l++) @out(i,j,k,l) = 0;

            for (int i=0; i<pout_shape0; i++)
            for (int j=0; j<pout_shape1; j++)
            for (int k=0; k<pout_shape2; k++)
            for (int l=0; l<pout_shape3; l++) {{
                int kx=k*{stride}+{size}/2-{padding};
                int ky=l*{stride}+{size}/2-{padding};
                int bo=1;
                for (int p=kx-{size}/2;p<=kx+{size}/2 && bo;p++)
                for (int q=ky-{size}/2;q<=ky+{size}/2 && bo;q++)
                    if (p>=0 && q>=0 && p<in0_shape2 && q<in0_shape3)
                    if (@pout(i,j,k,l) == @in0(i,j,p,q)) {{
                        @out(i,j,p,q) += @dout(i,j,k,l);
                        bo=0;
                    }}
            }}
        '''
                ])
    return y
Exemple #29
0
    def execute(self, x, numangle, numrho):
        n, c, h, w = x.shape
        cuda_src_forward = csf.replace('#numangle', str(numangle))
        cuda_src_forward = cuda_src_forward.replace('#numrho', str(numrho))

        irho = int((h * h + w * w)**0.5 + 1) / float((numrho - 1))
        itheta = 3.14159265358979323846 / numangle
        angle = jt.arange(numangle) * itheta
        tabCos = angle.cos() / irho
        tabSin = angle.sin() / irho

        output = jt.code([n, c, numangle, numrho],
                         x.dtype, [x, tabCos, tabSin],
                         cuda_src=cuda_src_forward)

        self.save_vars = x, numangle, numrho
        return output
Exemple #30
0
 def test_multi_output(self):
     a = jt.array([3, 2, 1])
     b, c = jt.code([[2], [4]], ["float32", "float64"], [a],
                    cpu_src="""
             @alias(a, in0)
             @alias(b, out0)
             @alias(c, out1)
             for (int i=0; i<a_shape0; i++) {
                 if (i<b_shape0) @b(i) = @a(i);
                 if (i<c_shape0) @c(i) = @a(i);
             }
         """)
     assert b.shape == [2]
     assert c.shape == [4]
     assert b.dtype == "float32"
     assert c.dtype == "float64"
     assert (b.data == [3, 2]).all()
     assert (c.data[:3] == [3, 2, 1]).all()