Example #1
0
def _correlate_or_convolve(input, weights, output, mode, cval, origin,
                           convolution):
    if input.dtype.kind == 'c':
        raise TypeError('Complex type not supported.')
    if not hasattr(origin, '__getitem__'):
        origin = [
            origin,
        ] * input.ndim
    else:
        origin = list(origin)
    wshape = [ii for ii in weights.shape if ii > 0]
    if len(wshape) != input.ndim:
        raise RuntimeError('filter weights array has incorrect shape.')
    if convolution:
        weights = weights[tuple([slice(None, None, -1)] * weights.ndim)]
        for ii in range(len(origin)):
            origin[ii] = -origin[ii]
            if weights.shape[ii] % 2 == 0:
                origin[ii] -= 1
    for _origin, lenw in zip(origin, wshape):
        if (lenw // 2 + _origin < 0) or (lenw // 2 + _origin >= lenw):
            raise ValueError('invalid origin')
    if mode not in ('reflect', 'constant', 'nearest', 'mirror', 'wrap'):
        msg = 'boundary mode not supported (actual: {}).'.format(mode)
        raise RuntimeError(msg)

    output = _get_output(output, input)
    if weights.size == 0:
        return output
    input = cupy.ascontiguousarray(input)
    weights = cupy.ascontiguousarray(weights, cupy.float64)
    return _get_correlete_kernel(input.ndim, mode, cval, input.shape,
                                 tuple(wshape), tuple(origin))(input, weights,
                                                               output)
 def slice(self, slice):
     res = Sparse4DData()
     res.indices = cp.ascontiguousarray(self.indices[slice])
     res.counts = cp.ascontiguousarray(self.counts[slice])
     res.scan_dimensions = np.array(res.counts.shape[:2])
     res.frame_dimensions = self.frame_dimensions.copy()
     return res
Example #3
0
def reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others):
    set0 = set(sub0)
    set1 = set(sub1)
    assert len(set0) == len(sub0), 'operand 0 should be reduced: diagonal'
    assert len(set1) == len(sub1), 'operand 1 should be reduced: diagonal'

    if len(sub0) == 0 or len(sub1) == 0:
        return arr0 * arr1, sub0 + sub1

    set_others = set(sub_others)
    shared = set0 & set1
    batch_dims = shared & set_others
    contract_dims = shared - batch_dims

    bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims)
    bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims)

    sub_b = [sub0[axis] for axis in bs0]
    assert sub_b == [sub1[axis] for axis in bs1]
    sub_l = [sub0[axis] for axis in ts0]
    sub_r = [sub1[axis] for axis in ts1]

    sub_out = sub_b + sub_l + sub_r
    assert set(sub_out) <= set_others, 'operands should be reduced: unary sum'

    if len(contract_dims) == 0:
        # Use element-wise multiply when no contraction is needed
        if len(sub_out) == len(sub_others):
            # to assure final output of einsum is C-contiguous
            sub_out = sub_others
        arr0 = _expand_dims_transpose(arr0, sub0, sub_out)
        arr1 = _expand_dims_transpose(arr1, sub1, sub_out)
        return arr0 * arr1, sub_out

    for accelerator in _accelerator.get_routine_accelerators():
        if accelerator == _accelerator.ACCELERATOR_CUTENSOR:
            if _use_cutensor(arr0.dtype, sub0, arr1.dtype, sub1, batch_dims,
                             contract_dims):
                if len(sub_out) == len(sub_others):
                    # to assure final output of einsum is C-contiguous
                    sub_out = sub_others
                out_shape = _get_out_shape(arr0.shape, sub0, arr1.shape, sub1,
                                           sub_out)
                arr_out = cupy.empty(out_shape, arr0.dtype)
                arr0 = cupy.ascontiguousarray(arr0)
                arr1 = cupy.ascontiguousarray(arr1)
                desc_0 = cutensor.create_tensor_descriptor(arr0)
                desc_1 = cutensor.create_tensor_descriptor(arr1)
                desc_out = cutensor.create_tensor_descriptor(arr_out)
                arr_out = cutensor.contraction(1.0, arr0, desc_0, sub0, arr1,
                                               desc_1, sub1, 0.0, arr_out,
                                               desc_out, sub_out)
                return arr_out, sub_out

    tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0])
    tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1])
    shapes_out = shapes0[0] + shapes0[1] + shapes1[2]
    assert shapes0[0] == shapes1[0]
    arr_out = cupy.matmul(tmp0, tmp1).reshape(shapes_out)
    return arr_out, sub_out
Example #4
0
def nn_gpu(ref, query):
    import cupy

    with open(cu_file) as f:
        kernel = cupy.RawKernel(f.read(), "cuComputeDistanceGlobal")

    ref_nb, ref_dim = ref.shape
    query_nb, query_dim = query.shape
    assert ref_dim == query_dim
    dim = ref_dim

    ref = ref.transpose(1, 0)
    query = query.transpose(1, 0)
    ref = cupy.ascontiguousarray(ref)
    query = cupy.ascontiguousarray(query)

    dist = cupy.empty((ref_nb, query_nb), dtype=cupy.float32)

    BLOCK_DIM = 16
    grid = (
        int(math.ceil(query_nb / BLOCK_DIM)),
        int(math.ceil(ref_nb / BLOCK_DIM)),
        1,
    )
    block = (16, 16, 1)
    args = (ref, ref_nb, query, query_nb, dim, dist)
    shared_mem = BLOCK_DIM * BLOCK_DIM + BLOCK_DIM * BLOCK_DIM + 5

    kernel(grid, block, args=args, shared_mem=shared_mem)

    indices = cupy.argmin(dist, axis=0)
    return indices
    def set_hooke_vertical(self, hooke, div_hooke=None):
        """Sets a hooke tensor, using a vertical approximation where suitable."""
        # Project, reconstruct, and compute difference
        vertical = self._to_vertical(hooke)
        # Find where the vertical approximation is suitable
        reconstr = self._from_vertical(vertical)
        error2 = ((reconstr - hooke)**2).sum(axis=(0, 1))
        reconstr = None
        norm2 = (hooke**2).sum(axis=(0, 1))
        rel_error = np.sqrt(error2 / norm2)
        norm2, error2 = None, None
        self._vertical = self.block_expand(vertical)
        vertical = None
        full = rel_error > self.vertical_thres  # where to use the full hooke tensors
        rel_error = None
        full_ratio = full.sum() / np.prod(self.shape)
        if full_ratio >= 0.3:
            print(
                "Performance warning : proportion {full_ratio} of "
                "Hooke tensors cannot be handled by the vertical approximation"
            )
        self._full_index = self.block_expand(
            np.nonzeros(full).astype(np.int32))
        full_hooke = hooke[:, :, full]
        full = None
        weights, offsets = VoronoiDecomposition(full_hooke, offset_t=np.int8)
        self._full_weights = cp.ascontiguousarray(np.moveaxis(weights, 0, 1))
        weights = None
        self._full_offsets = cp.ascontiguousarray(
            np.moveaxis(self._compress_offsets(offsets), 0, 1))
        offsets = None

        assert False  # __TODO__ : compute div_hooke
Example #6
0
def fast_iterative_method(self,data):
	"""
	Applies (a variant of) the fast iterative method.
	"""
	updateNext_o = fd.block_expand(data.trigger,self.shape_i,mode='constant',
		constant_values=False).reshape(self.shape_o+(-1,)).any(axis=-1)
	updateNext_o = cp.ascontiguousarray(updateNext_o.astype(np.uint8))
	scorePrev_o = cp.zeros(self.shape_o, dtype='uint8')
	scoreNext_o = scorePrev_o.copy()
	policy = data.policy
	nitermax_o = policy.nitermax_o
	stop = self.InitStop(data)

	# strict_iter_o needed
	for niter_o in range(nitermax_o):
		if stop(updateNext_o): return niter_o
		updateList_o = cp.ascontiguousarray(cp.flatnonzero(updateNext_o), dtype=self.int_t)
		scorePrev_o,scoreNext_o = scoreNext_o,scorePrev_o
		updateNext_o.fill(0); scoreNext_o.fill(0)
		data.kernel((updateList_o.size,),(self.size_i,), 
			KernelArgs(data) + (updateList_o,scorePrev_o,scoreNext_o,updateNext_o))
#		print("------------- scorePrev_o,scoreNext_o,updateNext_o -------------------")
#		print(scorePrev_o)
#		print(scoreNext_o)
#		print(updateNext_o)

	return nitermax_o
Example #7
0
def activation_backward(x, y, gy, mode):
    x = cupy.ascontiguousarray(x)
    gy = cupy.ascontiguousarray(gy)

    gx = cupy.empty_like(x)
    dtype = 'd' if x.dtype == 'd' else 'f'
    one = numpy.array(1, dtype=dtype).ctypes
    zero = numpy.array(0, dtype=dtype).ctypes
    handle = get_handle()
    y_mat = _as4darray(y)
    desc = create_tensor_descriptor(y_mat)
    if _cudnn_version >= 4000:
        act_desc = Descriptor(cudnn.createActivationDescriptor(),
                              cudnn.destroyActivationDescriptor)
        cudnn.setActivationDescriptor(
            act_desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0)
        cudnn.activationBackward_v4(
            handle, act_desc.value, one.data, desc.value, y.data.ptr,
            desc.value, gy.data.ptr, desc.value, x.data.ptr,
            zero.data, desc.value, gx.data.ptr)
    else:
        cudnn.activationBackward_v3(
            handle, mode, one.data, desc.value, y.data.ptr,
            desc.value, gy.data.ptr, desc.value, x.data.ptr,
            zero.data, desc.value, gx.data.ptr)
    return gx
    def backward_gpu(self, inputs, gradients):
        # data_in: [bs, nf, ...]
        # indices: [bs, is, is]
        # data_out: [bs, is, is, ..]
        data_in_shape = inputs[0].shape
        indices = cp.ascontiguousarray(inputs[1])
        grad_out = cp.ascontiguousarray(gradients[0])
        grad_in = cp.ascontiguousarray(cp.zeros(data_in_shape, 'float32'))
        chainer.cuda.elementwise(
            'raw float32 grad_in, int32 index, raw float32 grad_out',
            '',
            string.Template('''
                if (index < 0) return;

                int bn = i / (${image_size} * ${image_size});
                int pos_from = bn * ${num_features} * ${dim} + index * ${dim};
                int pos_to = i * ${dim};
                float* p1 = (float*)&grad_in[pos_from];
                float* p2 = (float*)&grad_out[pos_to];
                for (int j = 0; j < ${dim}; j++) atomicAdd(p1++, *p2++);
            ''').substitute(
                image_size=indices.shape[1],
                num_features=data_in_shape[1],
                dim=functools.reduce(lambda x, y: x * y, data_in_shape[2:]),
            ),
            'function',
        )(
            grad_in,
            indices,
            grad_out,
        )
        return grad_in, None
Example #9
0
    def forward(ctx, data, masks, default_value):
        # PyTorch to CuPy
        device = data.device
        data_in = cp.asarray(data)
        masks = cp.asarray(masks)
        data_out = data_in.copy()
        dim = data_in.size / masks.size

        # distribute
        masks = cp.ascontiguousarray(masks)
        data_out = cp.ascontiguousarray(data_out)
        kernel = cp.ElementwiseKernel(
            'raw S data_out, int64 mask',
            '',
            string.Template('''
                if (mask == 0) {
                    ${dtype}* p = (${dtype}*)&data_out[i * ${dim}];
                    for (int j = 0; j < ${dim}; j++) *p++ = ${default_value};
                }
            ''').substitute(
                dim=dim,
                dtype=utils.get_dtype_in_cuda(data_out.dtype),
                default_value=default_value,
            ),
            'function',
        )
        kernel(data_out, masks)

        # CuPy to PyTorch
        data_out = torch.as_tensor(data_out, device=device)

        return data_out
Example #10
0
def _fftn(a, s, axes, norm, direction, value_type='C2C', order='A', plan=None,
          overwrite_x=False, out=None):
    if norm not in (None, 'ortho'):
        raise ValueError('Invalid norm value %s, should be None or "ortho".'
                         % norm)

    axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes)
    if not axes_sorted:
        return a
    a = _convert_dtype(a, value_type)

    if order == 'A':
        if a.flags.f_contiguous:
            order = 'F'
        elif a.flags.c_contiguous:
            order = 'C'
        else:
            a = cupy.ascontiguousarray(a)
            order = 'C'
    elif order not in ['C', 'F']:
        raise ValueError('Unsupported order: {}'.format(order))

    # Note: need to call_cook_shape prior to sorting the axes
    a = _cook_shape(a, s, axes, value_type, order=order)

    if order == 'C' and not a.flags.c_contiguous:
        a = cupy.ascontiguousarray(a)
    elif order == 'F' and not a.flags.f_contiguous:
        a = cupy.asfortranarray(a)
    a = _exec_fftn(a, direction, value_type, norm=norm, axes=axes_sorted,
                   overwrite_x=overwrite_x, plan=plan, out=out)
    return a
Example #11
0
def activation_backward(x, y, gy, mode):
    x = cupy.ascontiguousarray(x)
    gy = cupy.ascontiguousarray(gy)

    gx = cupy.empty_like(x)
    dtype = "d" if x.dtype == "d" else "f"
    one = numpy.array(1, dtype=dtype).ctypes
    zero = numpy.array(0, dtype=dtype).ctypes
    handle = get_handle()
    y_mat = _as4darray(y)
    desc = create_tensor_descriptor(y_mat)
    cudnn.activationBackward_v3(
        handle,
        mode,
        one.data,
        desc.value,
        y.data.ptr,
        desc.value,
        gy.data.ptr,
        desc.value,
        x.data.ptr,
        zero.data,
        desc.value,
        gx.data.ptr,
    )
    return gx
Example #12
0
def activation_backward(x, y, gy, mode):
    x = cupy.ascontiguousarray(x)
    gy = cupy.ascontiguousarray(gy)

    gx = cupy.empty_like(x)
    dtype = 'd' if x.dtype == 'd' else 'f'
    one = numpy.array(1, dtype=dtype).ctypes
    zero = numpy.array(0, dtype=dtype).ctypes
    handle = get_handle()
    y_mat = _as4darray(y)
    desc = create_tensor_descriptor(y_mat)
    if _cudnn_version >= 4000:
        act_desc = Descriptor(cudnn.createActivationDescriptor(),
                              cudnn.destroyActivationDescriptor)
        cudnn.setActivationDescriptor(
            act_desc.value, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, 0.0)
        cudnn.activationBackward_v4(
            handle, act_desc.value, one.data, desc.value, y.data.ptr,
            desc.value, gy.data.ptr, desc.value, x.data.ptr,
            zero.data, desc.value, gx.data.ptr)
    else:
        cudnn.activationBackward_v3(
            handle, mode, one.data, desc.value, y.data.ptr,
            desc.value, gy.data.ptr, desc.value, x.data.ptr,
            zero.data, desc.value, gx.data.ptr)
    return gx
    def backward(self, indexes, gx):
        torch_y_grad = [
            torch.zeros(*gx[i].shape,
                        device=torch.device("cuda:" + str(self.gpu)),
                        requires_grad=False,
                        dtype=torch.float32).contiguous()
            for i in range(len(gx))
        ]
        for i in range(len(gx)):
            if gx[i] is not None:
                datacopy(torch_y_grad[i].data.data_ptr(),
                         cupy.ascontiguousarray(gx[i].data),
                         torch_y_grad[i].numel())

        for i in range(len(gx)):
            if gx[i] is not None:
                self.torch_y[i].backward(torch_y_grad[i])

        x_grad = [
            cupy.ascontiguousarray(
                cupy.zeros(tuple(self.torch_x[indexes[i]].grad.shape),
                           dtype=cupy.float32)) for i in range(len(indexes))
        ]
        for i in range(len(indexes)):
            datacopy(x_grad[i],
                     self.torch_x[indexes[i]].grad.contiguous().data_ptr(),
                     x_grad[i].size)

        # Not support Double Backprop for now
        x_grad = [chainer.Variable(x, requires_grad=False) for x in x_grad]
        return tuple(x_grad)
Example #14
0
def _fftn(a,
          s,
          axes,
          norm,
          direction,
          value_type='C2C',
          order='A',
          plan=None,
          overwrite_x=False,
          out=None):
    if norm not in (None, 'ortho'):
        raise ValueError('Invalid norm value %s, should be None or "ortho".' %
                         norm)

    axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type)
    if not axes_sorted:
        if value_type == 'C2C':
            return a
        else:
            raise IndexError('list index out of range')
    a = _convert_dtype(a, value_type)

    if order == 'A':
        if a.flags.f_contiguous:
            order = 'F'
        elif a.flags.c_contiguous:
            order = 'C'
        else:
            a = cupy.ascontiguousarray(a)
            order = 'C'
    elif order not in ['C', 'F']:
        raise ValueError('Unsupported order: {}'.format(order))

    # Note: need to call _cook_shape prior to sorting the axes
    a = _cook_shape(a, s, axes, value_type, order=order)

    for n in a.shape:
        if n < 1:
            raise ValueError(
                'Invalid number of FFT data points (%d) specified.' % n)

    if order == 'C' and not a.flags.c_contiguous:
        a = cupy.ascontiguousarray(a)
    elif order == 'F' and not a.flags.f_contiguous:
        a = cupy.asfortranarray(a)

    # _cook_shape tells us input shape only, and not output shape
    out_size = _get_fftn_out_size(a.shape, s, axes_sorted[-1], value_type)

    a = _exec_fftn(a,
                   direction,
                   value_type,
                   norm=norm,
                   axes=axes_sorted,
                   overwrite_x=overwrite_x,
                   plan=plan,
                   out=out,
                   out_size=out_size)
    return a
Example #15
0
def ensure_cugraph_obj(obj, nx_weight_attr=None, matrix_graph_type=None):
    """
    Convert the input obj - if possible - to a cuGraph Graph-type obj (Graph,
    DiGraph, etc.) and return a tuple of (cugraph Graph-type obj, original
    input obj type). If matrix_graph_type is specified, it is used as the
    cugraph Graph-type obj to create when converting from a matrix type.
    """
    # FIXME: importing here to avoid circular import
    from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph
    from cugraph.utilities.nx_factory import convert_from_nx

    input_type = type(obj)
    if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]:
        return (obj, input_type)

    elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]):
        return (convert_from_nx(obj, weight=nx_weight_attr), input_type)

    elif (input_type in CP_MATRIX_TYPES) or (input_type in SP_MATRIX_TYPES):

        if matrix_graph_type is None:
            matrix_graph_type = Graph
        elif matrix_graph_type not in [Graph, DiGraph]:
            raise TypeError(
                f"matrix_graph_type must be either a cugraph "
                f"Graph or DiGraph, got: {matrix_graph_type}"
            )

        if input_type in (
            CP_COMPRESSED_MATRIX_TYPES + SP_COMPRESSED_MATRIX_TYPES
        ):
            coo = obj.tocoo(copy=False)
        else:
            coo = obj

        if input_type in CP_MATRIX_TYPES:
            df = cudf.DataFrame(
                {
                    "source": cp.ascontiguousarray(coo.row),
                    "destination": cp.ascontiguousarray(coo.col),
                    "weight": cp.ascontiguousarray(coo.data),
                }
            )
        else:
            df = cudf.DataFrame(
                {"source": coo.row, "destination": coo.col, "weight": coo.data}
            )
        # FIXME:
        # * do a quick check that symmetry is stored explicitly in the cupy
        #   data for sym matrices (ie. for each uv, check vu is there)
        # * populate the cugraph graph with directed data and set renumbering
        #   to false in from edge list call.
        G = matrix_graph_type()
        G.from_cudf_edgelist(df, edge_attr="weight", renumber=True)

        return (G, input_type)

    else:
        raise TypeError(f"obj of type {input_type} is not supported.")
Example #16
0
def _fftn(a,
          s,
          axes,
          norm,
          direction,
          value_type='C2C',
          order='A',
          plan=None,
          overwrite_x=False,
          out=None):
    if norm not in (None, 'ortho'):
        raise ValueError('Invalid norm value %s, should be None or "ortho".' %
                         norm)

    a = _convert_dtype(a, value_type)

    if (s is not None) and (axes is not None) and len(s) != len(axes):
        raise ValueError('Shape and axes have different lengths.')

    if axes is None:
        if s is None:
            dim = a.ndim
        else:
            dim = len(s)
        axes = [i for i in six.moves.range(-dim, 0)]
    axes = tuple(axes)

    if order == 'A':
        if a.flags.f_contiguous:
            order = 'F'
        elif a.flags.c_contiguous:
            order = 'C'
        else:
            a = cupy.ascontiguousarray(a)
            order = 'C'
    elif order not in ['C', 'F']:
        raise ValueError('Unsupported order: {}'.format(order))

    a = _cook_shape(a, s, axes, value_type, order=order)
    if order == 'C' and not a.flags.c_contiguous:
        a = cupy.ascontiguousarray(a)
    elif order == 'F' and not a.flags.f_contiguous:
        a = cupy.asfortranarray(a)

    # sort the provided axes in ascending order
    axes = tuple(sorted(np.mod(axes, a.ndim)))

    a = _exec_fftn(a,
                   direction,
                   value_type,
                   norm=norm,
                   axes=axes,
                   overwrite_x=overwrite_x,
                   plan=plan,
                   out=out)
    return a
Example #17
0
def _apply_conv_mode(full, s1, s2, mode, axes):
    # See scipy's documentation in scipy.signal.signaltools
    if mode == 'full':
        return cupy.ascontiguousarray(full)
    if mode == 'valid':
        s1 = [full.shape[a] if a not in axes else s1[a] - s2[a] + 1
              for a in range(full.ndim)]
    starts = [(cur-new)//2 for cur, new in zip(full.shape, s1)]
    slices = tuple(slice(start, start+length)
                   for start, length in zip(starts, s1))
    return cupy.ascontiguousarray(full[slices])
Example #18
0
 def fwd_ptycho(self, psi, prb, scan):
     """Ptychography transform (FQ)"""
     res = cp.zeros([self.nscan, self.ndet, self.ndet], dtype='complex64')
     # convert to C-contiguous arrays if needed
     psi = cp.ascontiguousarray(psi)
     prb = cp.ascontiguousarray(prb)
     scan = cp.ascontiguousarray(scan)
     # run C wrapper
     self.fwd(res.data.ptr, psi.data.ptr,
              prb.data.ptr, scan.data.ptr, 0)  # igpu = 0
     return res
Example #19
0
 def adj_ptycho(self, data, prb, scan):
     """Adjoint ptychography transform (Q*F*)"""
     res = cp.zeros([self.nz, self.n], dtype='complex64')
     # convert to C-contiguous arrays if needed
     data = data.copy()
     data = cp.ascontiguousarray(data)
     prb = cp.ascontiguousarray(prb)
     scan = cp.ascontiguousarray(scan)
     # run C wrapper
     self.adj(res.data.ptr, data.data.ptr,
              prb.data.ptr, scan.data.ptr, 0)  # igpu = 0
     return res
Example #20
0
 def load_mc(self, t_ij):
     for j, (l, h) in enumerate(
             zip(self.observables.lows, self.observables.highs)):
         in_bounds = np.logical_and(t_ij[:, j] > l, t_ij[:, j] < h)
         t_ij = t_ij[in_bounds]
     self.t_ij = cp.asarray(t_ij)
     self.w_i = cp.ones(t_ij.shape[0])
     if self.bootstrap_binning is not None:
         counts, _ = cp.histogramdd(cp.asarray(self.t_ij),
                                    bins=self.bin_edges,
                                    weights=cp.asarray(self.w_i))
         self.counts = (cp.asarray(counts).flatten() / self.bin_vol /
                        cp.sum(cp.asarray(counts))).reshape(counts.shape)
     self.sigma_j = cp.std(self.t_ij, axis=0)
     self.h_ij = self._adapt_bandwidth()
     for j, (l, h, refl) in enumerate(
             zip(self.observables.lows, self.observables.highs,
                 self.reflect_axes)):
         if not refl:
             continue
         if type(refl) == tuple:
             low, high = refl
             mask = self.t_ij[:, j] < low
             t_ij_reflected_low = cp.copy(self.t_ij[mask, :])
             h_ij_reflected_low = self.h_ij[mask, :]
             w_i_reflected_low = self.w_i[mask, :]
             t_ij_reflected_low[:, j] = 2 * l - t_ij_reflected_low[:, j]
             mask = self.t_ij[:, j] > high
             t_ij_reflected_high = cp.copy(self.t_ij[mask, :])
             h_ij_reflected_high = self.h_ij[mask, :]
             w_i_reflected_high = self.w_i[mask, :]
             t_ij_reflected_high[:, j] = 2 * h - t_ij_reflected_high[:, j]
         else:
             t_ij_reflected_low = cp.copy(self.t_ij)
             h_ij_reflected_low = self.h_ij
             w_i_reflected_low = self.w_i
             t_ij_reflected_low[:, j] = 2 * l - self.t_ij[:, j]
             t_ij_reflected_high = cp.copy(self.t_ij)
             h_ij_reflected_high = self.h_ij
             w_i_reflected_high = self.w_i
             t_ij_reflected_high[:, j] = 2 * h - self.t_ij[:, j]
         self.t_ij = cp.concatenate(
             [self.t_ij, t_ij_reflected_low, t_ij_reflected_high])
         self.h_ij = cp.concatenate(
             [self.h_ij, h_ij_reflected_low, h_ij_reflected_high])
         self.w_i = cp.concatenate(
             [self.w_i, w_i_reflected_low, w_i_reflected_high])
     self.t_ij = cp.ascontiguousarray(self.t_ij)
     self.h_ij = cp.ascontiguousarray(self.h_ij)
     self.w_i = cp.ascontiguousarray(self.w_i)
Example #21
0
    def __init__(self, data):
        self.table_size = 2**24
        self.hash_factor = 2531011
        self.dim = data.shape[-1]

        self.indices = cp.ascontiguousarray(
            cp.zeros((self.table_size, ), 'int32')) - 1
        self.values = cp.ascontiguousarray(
            cp.zeros((self.table_size, self.dim), 'int32'))
        self.value_list = cp.ascontiguousarray(
            cp.zeros((self.table_size, self.dim), 'int32'))
        self.size = None

        self.init_keys(data)
Example #22
0
def reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others):
    set0 = set(sub0)
    set1 = set(sub1)
    assert len(set0) == len(sub0), 'operand 0 should be reduced: diagonal'
    assert len(set1) == len(sub1), 'operand 1 should be reduced: diagonal'

    if len(sub0) == 0 or len(sub1) == 0:
        return arr0 * arr1, sub0 + sub1

    set_others = set(sub_others)
    shared = set0 & set1
    batch_dims = shared & set_others
    contract_dims = shared - batch_dims

    bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims)
    bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims)

    sub_b = [sub0[axis] for axis in bs0]
    assert sub_b == [sub1[axis] for axis in bs1]
    sub_l = [sub0[axis] for axis in ts0]
    sub_r = [sub1[axis] for axis in ts1]

    sub_out = sub_b + sub_l + sub_r
    assert set(sub_out) <= set_others, 'operands should be reduced: unary sum'

    if _use_cutensor(arr0.dtype, sub0, arr1.dtype, sub1,
                     batch_dims, contract_dims):
        if len(sub_out) == len(sub_others):
            sub_out = sub_others
        out_shape = _get_out_shape(arr0.shape, sub0, arr1.shape, sub1, sub_out)
        arr_out = cupy.empty(out_shape, arr0.dtype)
        arr0 = cupy.ascontiguousarray(arr0)
        arr1 = cupy.ascontiguousarray(arr1)
        desc_0 = cutensor.create_tensor_descriptor(arr0)
        desc_1 = cutensor.create_tensor_descriptor(arr1)
        desc_out = cutensor.create_tensor_descriptor(arr_out)
        arr_out = cutensor.contraction(1.0,
                                       arr0, desc_0, sub0,
                                       arr1, desc_1, sub1,
                                       0.0,
                                       arr_out, desc_out, sub_out)
        return arr_out, sub_out

    tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0])
    tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1])
    shapes_out = shapes0[0] + shapes0[1] + shapes1[2]
    assert shapes0[0] == shapes1[0]
    arr_out = cupy.matmul(tmp0, tmp1).reshape(shapes_out)
    return arr_out, sub_out
Example #23
0
def activation_backward(x, y, gy, mode):
    x = cupy.ascontiguousarray(x)
    gy = cupy.ascontiguousarray(gy)

    gx = cupy.empty_like(x)
    dtype = 'd' if x.dtype == 'd' else 'f'
    one = numpy.array(1, dtype=dtype).ctypes
    zero = numpy.array(0, dtype=dtype).ctypes
    handle = get_handle()
    y_mat = _as4darray(y)
    desc = create_tensor_descriptor(y_mat)
    cudnn.activationBackward_v3(handle, mode, one.data, desc.value, y.data.ptr,
                                desc.value, gy.data.ptr, desc.value,
                                x.data.ptr, zero.data, desc.value, gx.data.ptr)
    return gx
Example #24
0
    def find(self, data):
        ret = cp.ascontiguousarray(cp.zeros(data.shape[:-1], 'int32')) - 1
        data = cp.ascontiguousarray(data)
        loop_indices = cp.arange(data.size / self.dim).astype('int32')
        ok = cp.zeros((1, ), 'int32')
        chainer.cuda.elementwise(
            'int32 j, raw int32 data, raw int32 indices, raw int32 values, raw int32 ret, raw int32 ok',
            '',
            string.Template('''
                /* */
                int* value = &data[j * ${dim}];
                int bn = i / ${num_points};

                /* compute initial key */
                unsigned int key = 0;
                for (int k = 0; k < ${dim}; k++) key = (key + value[k]) * ${hash_factor};
                key = key % ${table_size};

                for (int l = 0; l < 100; l++) {
                    if (indices[bn * ${table_size} + key] < 0) {
                        ret[j] = -1;
                        break;
                    }
                    bool match = true;
                    for (int k = 0; k < ${dim}; k++)
                        if (values[(bn * ${table_size} + key) * ${dim} + k] != value[k])
                            match = false;
                    if (match) {
                        ret[j] = indices[bn * ${table_size} + key];
                        break;
                    } else {
                        key = (key + 1) % ${table_size};
                    }
                    if (l == 99) {
                        ok[0] = -1;
                    }
                }
            ''').substitute(
                table_size=self.table_size,
                hash_factor=self.hash_factor,
                num_points=data.shape[1],
                dim=self.dim,
            ),
            'function',
        )(loop_indices, data, self.indices, self.values, ret, ok)
        if int(ok[0]) < 0:
            raise Exception
        return ret
    def forward_gpu(self, inputs):
        # data_in: [bs, nf, ...]
        # indices: [bs, is, is]
        # data_out: [bs, is, is, ..]
        data_in, indices = list(map(cp.ascontiguousarray, inputs))
        data_out = cp.ascontiguousarray(
            cp.zeros(tuple(list(indices.shape[:3]) + list(data_in.shape[2:])),
                     'float32'))
        chainer.cuda.elementwise(
            'raw float32 data_in, int32 index, raw float32 data_out',
            '',
            string.Template('''
                if (index < 0) return;

                int bn = i / (${image_size} * ${image_size});
                int pos_from = bn * ${num_features} * ${dim} + index * ${dim};
                int pos_to = i * ${dim};
                float* p1 = (float*)&data_in[pos_from];
                float* p2 = (float*)&data_out[pos_to];
                for (int j = 0; j < ${dim}; j++) *p2++ = *p1++;
            ''').substitute(
                image_size=indices.shape[1],
                num_features=data_in.shape[1],
                dim=functools.reduce(lambda x, y: x * y, data_in.shape[2:]),
            ),
            'function',
        )(
            data_in,
            indices,
            data_out,
        )
        return data_out,
Example #26
0
def _filter_input(image, prefilter, mode, cval, order):
    """Perform spline prefiltering when needed.

    Spline orders > 1 need a prefiltering stage to preserve resolution.

    For boundary modes without analytical spline boundary conditions, some
    prepadding of the input with cupy.pad is used to maintain accuracy.
    ``npad`` is an integer corresponding to the amount of padding at each edge
    of the array.
    """
    if not prefilter or order < 2:
        return (cupy.ascontiguousarray(image), 0)
    padded, npad = _prepad_for_spline_filter(image, mode, cval)
    float_dtype = cupy.promote_types(image.dtype, cupy.float32)
    filtered = spline_filter(padded, order, output=float_dtype, mode=mode)
    return cupy.ascontiguousarray(filtered), npad
Example #27
0
 def _estimate_pdf_multi(self, x_kj, w_i=None, get=True):
     if w_i == None:
         w_i = self.w_i
     if self.bootstrap_binning is None:
         n = self.t_ij.shape[0]
         h_j = (4 / 3 / n)**(1 / 5) * self.sigma_j
         if cp == np:
             return np.asarray([
                 KernelDensityPDF._kdpdf0(x_j, self.t_ij, h_j, self.w_i)
                 for x_j in x_kj
             ])
         else:
             x_kj = cp.asarray(x_kj)
             h_j = cp.ascontiguousarray(cp.asarray(h_j))
             pdf_k = cp.empty(x_kj.shape[0])
             block_size = 64
             grid_size = x_kj.shape[0] // block_size + 1
             KernelDensityPDF._kdpdf0_multi(
                 (grid_size, ), (block_size, ),
                 (x_kj, self.t_ij, h_j, w_i, self.t_ij.shape[0],
                  self.t_ij.shape[1], x_kj.shape[0], pdf_k))
             pdf_k = pdf_k / cp.sum(self.w_i)
             return pdf_k.get() if get else pdf_k
     else:
         #should do this on GPU...
         x_kj = cp.asnumpy(x_kj)
         from scipy.interpolate import RegularGridInterpolator
         interp = RegularGridInterpolator(cp.asnumpy(self.bin_centers),
                                          cp.asnumpy(self.counts),
                                          bounds_error=False,
                                          fill_value=None)
         pdf_k = cp.asarray(interp(x_kj))
         min_val = np.min(self.counts)
         pdf_k[pdf_k < min_val] = min_val
         return pdf_k
Example #28
0
def _as_contiguous(args):
    if isinstance(args, (list, tuple)):
        ret = []
        for arg in args:
            if arg is None:
                ret.append(None)
                continue
            if arg.flags.c_contiguous is False:
                arg = cupy.ascontiguousarray(arg)
            ret.append(arg)
        return ret

    if args.flags.c_contiguous is False:
        args = cupy.ascontiguousarray(args)

    return args
Example #29
0
def _call_kernel(kernel, input, weights, output, weight_dtype=cupy.float64):
    """
    Calls a constructed ElementwiseKernel. The kernel must take an input image,
    an array of weights, and an output array.

    The weights are the only optional part and can be passed as None and then
    one less argument is passed to the kernel. If the output is given as None
    then it will be allocated in this function.

    This function deals with making sure that the weights are contiguous and
    float64 or bool*, that the output is allocated and appriopate shaped. This
    also deals with the situation that the input and output arrays overlap in
    memory.

    * weights is always casted to float64 or bool in order to get an output
    compatible with SciPy, though float32 might be sufficient when input dtype
    is low precision.
    """
    if weights is not None:
        weights = cupy.ascontiguousarray(weights, weight_dtype)

    needs_temp = cupy.shares_memory(output, input, "MAY_SHARE_BOUNDS")
    if needs_temp:
        output, temp = (
            _util._get_output(output.dtype, input, None, weight_dtype),
            output,
        )
    if weights is None:
        kernel(input, output)
    else:
        kernel(input, weights, output)
    if needs_temp:
        temp[...] = output[...]
        output = temp
    return output
Example #30
0
def _correlate(in1, in2, mode='full', method='auto', convolution=False):
    quick_out = _st_core._check_conv_inputs(in1, in2, mode, convolution)
    if quick_out is not None:
        return quick_out
    if method not in ('auto', 'direct', 'fft'):
        raise ValueError('acceptable methods are "auto", "direct", or "fft"')

    if method == 'auto':
        method = choose_conv_method(in1, in2, mode=mode)

    if method == 'direct':
        return _st_core._direct_correlate(in1, in2, mode, in1.dtype,
                                          convolution)

    # if method == 'fft':
    inputs_swapped = _st_core._inputs_swap_needed(mode, in1.shape, in2.shape)
    if inputs_swapped:
        in1, in2 = in2, in1
    if not convolution:
        in2 = _st_core._reverse_and_conj(in2)
    out = fftconvolve(in1, in2, mode)
    result_type = cupy.result_type(in1, in2)
    if result_type.kind in 'ui':
        out = out.round()
    out = out.astype(result_type, copy=False)
    if not convolution and inputs_swapped:
        out = cupy.ascontiguousarray(_st_core._reverse_and_conj(out))
    return out
Example #31
0
def _check_size_footprint_structure(ndim,
                                    size,
                                    footprint,
                                    structure,
                                    stacklevel=3,
                                    force_footprint=False):
    if structure is None and footprint is None:
        if size is None:
            raise RuntimeError("no footprint or filter size provided")
        sizes = _util._fix_sequence_arg(size, ndim, 'size', int)
        if force_footprint:
            return None, cupy.ones(sizes, bool), None
        return sizes, None, None
    if size is not None:
        warnings.warn("ignoring size because {} is set".format(
            'structure' if footprint is None else 'footprint'),
                      UserWarning,
                      stacklevel=stacklevel + 1)

    if footprint is not None:
        footprint = cupy.array(footprint, bool, True, 'C')
        if not footprint.any():
            raise ValueError("all-zero footprint is not supported")

    if structure is None:
        if not force_footprint and footprint.all():
            if footprint.ndim != ndim:
                raise RuntimeError("size must have length equal to input rank")
            return footprint.shape, None, None
        return None, footprint, None

    structure = cupy.ascontiguousarray(structure)
    if footprint is None:
        footprint = cupy.ones(structure.shape, bool)
    return None, footprint, structure
Example #32
0
    def from_4Dcamera_file(filename):
        with h5py.File(filename, 'r') as f0:
            frames = f0['electron_events/frames'][:]
            scan_dimensions = (
                f0['electron_events/scan_positions'].attrs['Ny'],
                f0['electron_events/scan_positions'].attrs['Nx'])
            frame_dimensions = np.array((576, 576))

        def unragged_frames_size(frames):
            mm = 0
            for ev in frames:
                if ev.shape[0] > mm:
                    mm = ev.shape[0]
            return mm

        def make_unragged_frames(frames, scan_dimensions):
            unragged_frame_size = unragged_frames_size(frames.ravel())
            fr_full = cp.zeros((frames.ravel().shape[0], unragged_frame_size),
                               dtype=cp.int32)
            fr_full[:] = cp.iinfo(fr_full.dtype).max
            for ii, ev in enumerate(frames.ravel()):
                fr_full[ii, :ev.shape[0]] = cp.array(ev)
            fr_full_4d = fr_full.reshape((*scan_dimensions, fr_full.shape[1]))
            fr_full_4d = fr_full_4d[:, :-1, :]
            return fr_full_4d

        d = Sparse4DData()
        d.indices = cp.ascontiguousarray(
            make_unragged_frames(frames.ravel(), scan_dimensions))
        d.scan_dimensions = np.array(d.indices.shape[:2])
        d.frame_dimensions = frame_dimensions
        d.counts = cp.zeros(d.indices.shape, dtype=cp.bool)
        d.counts[d.indices != cp.iinfo(d.indices.dtype).max] = 1

        return d
Example #33
0
def activation_forward(x, mode):
    x = cupy.ascontiguousarray(x)
    y = cupy.empty_like(x)

    dtype = "d" if x.dtype == "d" else "f"
    one = numpy.array(1, dtype=dtype).ctypes
    zero = numpy.array(0, dtype=dtype).ctypes
    handle = get_handle()
    x_mat = _as4darray(x)
    desc = create_tensor_descriptor(x_mat)
    cudnn.activationForward_v3(handle, mode, one.data, desc.value, x_mat.data.ptr, zero.data, desc.value, y.data.ptr)
    return y
def _call_nms_kernel(bbox, thresh):
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = _load_kernel('nms_kernel', _nms_gpu_code)
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec
Example #35
0
 def test_ascontiguousarray_on_contiguous_array(self):
     a = testing.shaped_arange((2, 3, 4))
     b = cupy.ascontiguousarray(a)
     self.assertIs(a, b)
Example #36
0
 def test_ascontiguousarray_on_noncontiguous_array(self):
     a = testing.shaped_arange((2, 3, 4))
     b = a.transpose(2, 0, 1)
     c = cupy.ascontiguousarray(b)
     self.assertTrue(c.flags.c_contiguous)
     testing.assert_array_equal(b, c)
    def __call__(self, loc, score,
                 anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(
            roi[:, slice(0, 4, 2)], 0, img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(
            roi[:, slice(1, 4, 2)], 0, img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(
            cp.ascontiguousarray(cp.asarray(roi)),
            thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi