def shaped_reverse_arange(shape, xp=cupy, dtype=numpy.float32): size = internal.prod(shape) a = numpy.arange(size, 0, -1) if numpy.dtype(dtype).type == numpy.bool_: return xp.array((a % 2 == 0).reshape(shape)) else: return xp.array(a.astype(dtype).reshape(shape))
def shaped_arange(shape, xp=cupy, dtype=numpy.float32, order='C'): """Returns an array with given shape, array module, and dtype. Args: shape(tuple of int): Shape of returned ndarray. xp(numpy or cupy): Array module to use. dtype(dtype): Dtype of returned ndarray. order({'C', 'F'}): Order of returned ndarray. Returns: numpy.ndarray or cupy.ndarray: The array filled with :math:`1, \\cdots, N` with specified dtype with given shape, array module. Here, :math:`N` is the size of the returned array. If ``dtype`` is ``numpy.bool_``, evens (resp. odds) are converted to ``True`` (resp. ``False``). """ dtype = numpy.dtype(dtype) a = numpy.arange(1, internal.prod(shape) + 1, 1) if dtype == '?': a = a % 2 == 0 elif dtype.kind == 'c': a = a + a * 1j return xp.array(a.astype(dtype).reshape(shape), order=order)
def size(self): """Number of elements this array holds. This is equivalent to product over the shape tuple. .. seealso:: :attr:`numpy.ndarray.size` """ return internal.prod(self._shape)
def take(a, indices, axis=None, out=None): """Takes elements of an array at specified indices along an axis. This is an implementation of "fancy indexing" at single axis. This function does not support ``mode`` option. Args: a (cupy.ndarray): Array to extract elements. indices (int or array-like): Indices of elements that this function takes. axis (int): The axis along which to select indices. The flattened input is used by default. out (cupy.ndarray): Output array. If provided, it should be of appropriate shape and dtype. Returns: cupy.ndarray: The result of fancy indexing. .. seealso:: :func:`numpy.take` """ if axis is None: a = a.ravel() lshape = () rshape = () else: if axis >= a.ndim: raise ValueError('Axis overrun') lshape = a.shape[:axis] rshape = a.shape[axis + 1:] if numpy.isscalar(indices): a = cupy.rollaxis(a, axis) if out is None: return a[indices].copy() else: out[:] = a[indices] return out elif not isinstance(indices, cupy.ndarray): indices = cupy.array(indices, dtype=int) out_shape = lshape + indices.shape + rshape if out is None: out = cupy.empty(out_shape, dtype=a.dtype) else: if out.dtype != a.dtype: raise TypeError('Output dtype mismatch') if out.shape != out_shape: raise ValueError('Output shape mismatch') cdim = indices.size rdim = internal.prod(rshape) indices = cupy.reshape( indices, (1,) * len(lshape) + indices.shape + (1,) * len(rshape)) return _take_kernel(a, indices, cdim, rdim, out)
def reshape(a, newshape): """Returns an array with new shape and same elements. It tries to return a view if possible, otherwise returns a copy. This function currently does not support ``order`` option. Args: a (cupy.ndarray): Array to be reshaped. newshape (int or tuple of ints): The new shape of the array to return. If it is an integer, then it is treated as a tuple of length one. It should be compatible with ``a.size``. One of the elements can be -1, which is automatically replaced with the appropriate value to make the shape compatible with ``a.size``. Returns: cupy.ndarray: A reshaped view of ``a`` if possible, otherwise a copy. .. seealso:: :func:`numpy.reshape` """ # TODO(beam2d): Support ordering option if isinstance(newshape, collections.Sequence): newshape = tuple(newshape) else: newshape = newshape, shape = a.shape if newshape == shape: return a.view() size = a.size newshape = internal.infer_unknown_dimension(newshape, size) if newshape == shape: return a.view() if internal.prod(newshape) != size: raise RuntimeError('Total size mismatch on reshape') newstrides = internal.get_strides_for_nocopy_reshape(a, newshape) if newstrides is not None: newarray = a.view() else: newarray = a.copy() newstrides = internal.get_strides_for_nocopy_reshape( newarray, newshape) newarray._shape = newshape newarray._strides = newstrides if newarray._c_contiguous == 1: newarray._f_contiguous = int(not size or len(shape) - shape.count(1) <= 1) else: newarray._f_contiguous = -1 return newarray
def reshape(a, newshape): """Returns an array with new shape and same elements. It tries to return a view if possible, otherwise returns a copy. This function currently does not support ``order`` option. Args: a (cupy.ndarray): Array to be reshaped. newshape (int or tuple of ints): The new shape of the array to return. If it is an integer, then it is treated as a tuple of length one. It should be compatible with ``a.size``. One of the elements can be -1, which is automatically replaced with the appropriate value to make the shape compatible with ``a.size``. Returns: cupy.ndarray: A reshaped view of ``a`` if possible, otherwise a copy. .. seealso:: :func:`numpy.reshape` """ # TODO(beam2d): Support ordering option if isinstance(newshape, collections.Sequence): newshape = tuple(newshape) else: newshape = newshape, shape = a.shape if newshape == shape: return a.view() size = a.size newshape = internal.infer_unknown_dimension(newshape, size) if newshape == shape: return a.view() if internal.prod(newshape) != size: raise RuntimeError('Total size mismatch on reshape') newstrides = internal.get_strides_for_nocopy_reshape(a, newshape) if newstrides is not None: newarray = a.view() else: newarray = a.copy() newstrides = internal.get_strides_for_nocopy_reshape( newarray, newshape) newarray._shape = newshape newarray._strides = newstrides if newarray._c_contiguous == 1: newarray._f_contiguous = int( not size or len(newshape) - newshape.count(1) <= 1) else: newarray._f_contiguous = -1 return newarray
def diagonal(a, offset=0, axis1=0, axis2=1): """Returns specified diagonals. This function extracts the diagonals along two specified axes. The other axes are not changed. This function returns a writable view of this array as NumPy 1.10 will do. Args: a (cupy.ndarray): Array from which the diagonals are taken. offset (int): Index of the diagonals. Zero indicates the main diagonals, a positive value upper diagonals, and a negative value lower diagonals. axis1 (int): The first axis to take diagonals from. axis2 (int): The second axis to take diagonals from. Returns: cupy.ndarray: A view of the diagonals of ``a``. .. seealso:: :func:`numpy.diagonal` """ if axis1 < axis2: min_axis, max_axis = axis1, axis2 else: min_axis, max_axis = axis2, axis1 tr = list(six.moves.range(a.ndim)) del tr[max_axis] del tr[min_axis] if offset >= 0: a = cupy.transpose(a, tr + [axis1, axis2]) else: a = cupy.transpose(a, tr + [axis2, axis1]) offset = -offset diag_size = max(0, min(a.shape[-2], a.shape[-1] - offset)) ret_shape = a.shape[:-2] + (diag_size,) if diag_size == 0: return cupy.empty(ret_shape, dtype=a.dtype) a = a[..., :diag_size, offset:offset + diag_size] ret = a.view() ret._shape = a.shape[:-2] + (diag_size,) ret._strides = a.strides[:-2] + (a.strides[-1] + a.strides[-2],) ret._size = internal.prod(ret._shape) ret._c_contiguous = -1 ret._f_contiguous = -1 return ret
def __init__(self, *arrays): ndim = 0 for array in arrays: if isinstance(array, cupy.ndarray): ndim = max(ndim, array.ndim) shape = [1] * ndim for array in arrays: if isinstance(array, cupy.ndarray): offset = len(shape) - array.ndim for i, dim in enumerate(array.shape): if dim != 1 and shape[i + offset] != dim: if shape[i + offset] != 1: raise RuntimeError('Broadcasting failed') else: shape[i + offset] = dim self.shape = tuple(shape) self.size = internal.prod(self.shape) self.nd = len(shape) broadcasted = [] for array in arrays: if not isinstance(array, cupy.ndarray): broadcasted.append(array) continue if array.shape == self.shape: broadcasted.append(array) continue offset = self.nd - array.ndim strides = [] for i, dim in enumerate(shape): if i < offset: # TODO(okuta) fix if `dim` == 1 strides.append(0) elif array.shape[i - offset] != dim: strides.append(0) else: strides.append(array._strides[i - offset]) view = array.view() view._shape = self.shape view._strides = tuple(strides) view._mark_dirty() broadcasted.append(view) self.values = broadcasted
def shaped_reverse_arange(shape, xp=cupy, dtype=numpy.float32): """Returns an array filled with decreasing numbers. Args: shape(tuple of int): Shape of returned ndarray. xp(numpy or cupy): Array module to use. dtype(dtype): Dtype of returned ndarray. Returns: numpy.ndarray or cupy.ndarray: The array filled with :math:`N, \cdots, 1` with specified dtype with given shape, array module. Here, :math:`N` is the size of the returned array. If ``dtype`` is ``numpy.bool_``, evens (resp. odds) are converted to ``True`` (resp. ``False``). """ size = internal.prod(shape) a = numpy.arange(size, 0, -1) if numpy.dtype(dtype).type == numpy.bool_: return xp.array((a % 2 == 0).reshape(shape)) else: return xp.array(a.astype(dtype).reshape(shape))
def __init__(self, *arrays): ndarray = cupy.ndarray rev = slice(None, None, -1) shape_arr = [a._shape[rev] for a in arrays if isinstance(a, ndarray)] r_shape = [max(ss) for ss in zip_longest(*shape_arr, fillvalue=0)] self.shape = shape = tuple(r_shape[rev]) self.size = size = internal.prod(shape) self.nd = ndim = len(shape) broadcasted = list(arrays) for i, a in enumerate(broadcasted): if not isinstance(a, ndarray): continue a_shape = a.shape if a_shape == shape: continue r_strides = [ a_st if sh == a_sh else (0 if a_sh == 1 else None) for sh, a_sh, a_st in six_zip(r_shape, a._shape[rev], a._strides[rev])] if None in r_strides: raise RuntimeError('Broadcasting failed') offset = (0,) * (ndim - len(r_strides)) broadcasted[i] = view = a.view() view._shape = shape view._strides = offset + tuple(r_strides[rev]) view._size = size view._c_contiguous = -1 view._f_contiguous = -1 self.values = tuple(broadcasted)
def __init__(self, *arrays): ndarray = cupy.ndarray rev = slice(None, None, -1) shape_arr = [a._shape[rev] for a in arrays if isinstance(a, ndarray)] r_shape = [max(ss) for ss in zip_longest(*shape_arr, fillvalue=0)] self.shape = shape = tuple(r_shape[rev]) self.size = size = internal.prod(shape) self.nd = ndim = len(shape) broadcasted = list(arrays) for i, a in enumerate(broadcasted): if not isinstance(a, ndarray): continue a_shape = a.shape if a_shape == shape: continue r_strides = [ a_st if sh == a_sh else (0 if a_sh == 1 else None) for sh, a_sh, a_st in six_zip( r_shape, a._shape[rev], a._strides[rev]) ] if None in r_strides: raise ValueError('Broadcasting failed') offset = (0, ) * (ndim - len(r_strides)) broadcasted[i] = view = a.view() view._shape = shape view._strides = offset + tuple(r_strides[rev]) view._size = size view._c_contiguous = -1 view._f_contiguous = -1 self.values = tuple(broadcasted)
def __init__(self, shape): self.shape = shape self.size = internal.prod(shape)
def tensordot(a, b, axes=2): """Returns the tensor dot product of two arrays along specified axes. This is equivalent to compute dot product along the specified axes which are treated as one axis by reshaping. Args: a (cupy.ndarray): The first argument. b (cupy.ndarray): The second argument. axes: - If it is an integer, then ``axes`` axes at the last of ``a`` and the first of ``b`` are used. - If it is a pair of sequences of integers, then these two sequences specify the list of axes for ``a`` and ``b``. The corresponding axes are paired for sum-product. Returns: cupy.ndarray: The tensor dot product of ``a`` and ``b`` along the axes specified by ``axes``. .. seealso:: :func:`numpy.tensordot` """ a_ndim = a.ndim b_ndim = b.ndim if a_ndim == 0 or b_ndim == 0: if axes != 0 and axes != ((), ()): raise ValueError('An input is zero-dim while axes has dimensions') return cupy.multiply(a, b) if isinstance(axes, collections.Sequence): if len(axes) != 2: raise ValueError('Axes must consist of two arrays.') a_axes, b_axes = axes if numpy.isscalar(a_axes): a_axes = a_axes, if numpy.isscalar(b_axes): b_axes = b_axes, else: a_axes = tuple(six.moves.range(a_ndim - axes, a_ndim)) b_axes = tuple(six.moves.range(axes)) sum_ndim = len(a_axes) if sum_ndim != len(b_axes): raise ValueError('Axes length mismatch') for a_axis, b_axis in zip(a_axes, b_axes): if a.shape[a_axis] != b.shape[b_axis]: raise ValueError('Axis dimension mismatch') # Make the axes non-negative a = _move_axes_to_head(a, [axis % a_ndim for axis in a_axes]) b = _move_axes_to_head(b, [axis % b_ndim for axis in b_axes]) ret_shape = a.shape[sum_ndim:] + b.shape[sum_ndim:] k = internal.prod(a.shape[:sum_ndim]) n = a.size // k m = b.size // k return core.tensordot_core(a, b, None, n, m, k, ret_shape)
def tensordot(a, b, axes=2, out=None): """Returns the tensor dot product of two arrays along specified axes. This is equivalent to compute dot product along the specified axes which are treated as one axis by reshaping. Args: a (cupy.ndarray): The first argument. b (cupy.ndarray): The second argument. axes: - If it is an integer, then ``axes`` axes at the last of ``a`` and the first of ``b`` are used. - If it is a pair of sequences of integers, then these two sequences specify the list of axes for ``a`` and ``b``. The corresponding axes are paired for sum-product. out (cupy.ndarray): Output array. Returns: cupy.ndarray: The tensor dot product of ``a`` and ``b`` along the axes specified by ``axes``. .. seealso:: :func:`numpy.tensordot` """ if a.ndim == 0 or b.ndim == 0: if axes != 0 and axes != ((), ()): raise ValueError('An input is zero-dim while axes has dimensions') return cupy.multiply(a, b, out=out) ret_dtype = numpy.find_common_type([a.dtype, b.dtype], []) # Cast to float32 or float64 dtype = numpy.find_common_type([a.dtype, b.dtype, 'f'], []) a = a.astype(dtype, copy=False) b = b.astype(dtype, copy=False) if a.dtype.type == numpy.float32: dot = cublas.sdot gemv = cublas.sgemv ger = cublas.sger gemm = cublas.sgemm elif a.dtype.type == numpy.float64: dot = cublas.ddot gemv = cublas.dgemv ger = cublas.dger gemm = cublas.dgemm if numpy.isscalar(axes): axes = [list(six.moves.range(a.ndim - axes, a.ndim)), list(six.moves.range(axes))] else: axes = list(axes) if numpy.isscalar(axes[0]): axes[0] = (axes[0],) if numpy.isscalar(axes[1]): axes[1] = (axes[1],) if len(axes) != 2: raise ValueError('Axes must consist of two arrays.') if len(axes[0]) != len(axes[1]): raise ValueError('Axes length mismatch') for a_axis, b_axis in zip(*axes): if not (-a.ndim <= a_axis < a.ndim and -b.ndim <= b_axis < b.ndim): raise IndexError('Axis overrun') if a.shape[a_axis] != b.shape[b_axis]: raise ValueError('Axis dimension mismatch') # Make the axes non-negative axes = (tuple(axis % a.ndim for axis in axes[0]), tuple(axis % b.ndim for axis in axes[1])) sum_ndim = len(axes[0]) a = _move_axes_to_head(a, axes[0]) b = _move_axes_to_head(b, axes[1]) m = internal.prod(b.shape[sum_ndim:]) n = internal.prod(a.shape[sum_ndim:]) ret_shape = a.shape[sum_ndim:] + b.shape[sum_ndim:] if out is not None: if out.size != internal.prod(ret_shape): raise ValueError('Output array has an invalid size') if not out.flags.c_contiguous: raise ValueError('Output array must be C-contiguous') if 0 in a.shape or 0 in b.shape: if 0 not in a.shape or 0 not in b.shape: raise ValueError('cannot dot zero-sized and non-zero-sized arrays') if out is None: return cupy.zeros(ret_shape, dtype=ret_dtype) else: out.fill(0) return out if out is None: out = cupy.empty(ret_shape, dtype=dtype) if dtype == ret_dtype: ret = out else: ret = cupy.empty(ret_shape, dtype=ret_dtype) else: ret = out if out.dtype != dtype: out = cupy.empty(ret_shape, dtype=dtype) k = a.size // n # It copies the operands if needed a = a.reshape(k, n) b = b.reshape(k, m) c = out.view() c.shape = (n, m) # Be careful that cuBLAS uses the FORTRAN-order matrix representation. handle = cuda.Device().cublas_handle if k == 1: if n == 1 or m == 1: # Scalar-vector product cupy.multiply(a.T, b, c) else: # Outer product A^T * B # c is C-contiguous while cuBLAS requires F-contiguous arrays, so # we compute C^T = B^T * A here. c.fill(0) a, inca = _to_cublas_vector(a, 1) b, incb = _to_cublas_vector(b, 1) ger(handle, m, n, 1, b._fptr, incb, a._fptr, inca, c._fptr, m) elif n == 1: if m == 1: # Inner product a, inca = _to_cublas_vector(a, 0) b, incb = _to_cublas_vector(b, 0) mode = cublas.getPointerMode(handle) cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) try: dot(handle, k, a._fptr, inca, b._fptr, incb, c._fptr) finally: cublas.setPointerMode(handle, mode) else: # Matrix-vector product B^T * A a, inca = _to_cublas_vector(a, 1) b, transb, ldb = _mat_to_cublas_contiguous(b.T) if transb: # gemv requires (m, k) as the original matrix dimensions # rather than the transposed dimensions. m, k = k, m gemv(handle, transb, m, k, 1, b._fptr, ldb, a._fptr, inca, 0, c._fptr, 1) elif m == 1: # Matrix-vector product A^T * B a, transa, lda = _mat_to_cublas_contiguous(a.T) b, incb = _to_cublas_vector(b, 1) if not transa: # gemv requires (n, k) as the original matrix dimensions rather # than the transposed dimensions. n, k = k, n gemv(handle, transa, n, k, 1, a._fptr, lda, b._fptr, incb, 0, c._fptr, 1) else: # Matrix-Matrix product A^T * B # c is C-contiguous while cuBLAS assumes F-contiguous inputs, so we # compute C^T = B^T * A here. a, transa, lda = _mat_to_cublas_contiguous(a) b, transb, ldb = _mat_to_cublas_contiguous(b.T) gemm(handle, transb, transa, m, n, k, 1, b._fptr, ldb, a._fptr, lda, 0, c._fptr, m) if dtype != ret_dtype: elementwise.copy(out, ret) return ret
def __getitem__(self, slices): # It supports the basic indexing (by slices, ints or Ellipsis) only. # TODO(beam2d): Support the advanced indexing of NumPy. if not isinstance(slices, tuple): slices = [slices] else: slices = list(slices) if six.moves.builtins.any(isinstance(s, ndarray) for s in slices): raise ValueError('Advanced indexing is not supported') # Expand ellipsis into empty slices n_newaxes = slices.count(newaxis) n_ellipses = slices.count(Ellipsis) if n_ellipses > 0: if n_ellipses > 1: raise ValueError('Only one Ellipsis is allowed in index') ellipsis = slices.index(Ellipsis) ellipsis_size = self.ndim - (len(slices) - n_newaxes - 1) slices[ellipsis:ellipsis + 1] = [slice(None)] * ellipsis_size slices += [slice(None)] * (self.ndim - len(slices) + n_newaxes) # Create new shape and stride shape = [] strides = [] j = 0 offset = 0 for i, s in enumerate(slices): if s is newaxis: shape.append(1) if j < self.ndim: strides.append(self._strides[j]) elif self.ndim > 0: strides.append(self._strides[-1]) else: strides.append(self.itemsize) elif isinstance(s, slice): s = internal.complete_slice(s, self._shape[j]) if s.step > 0: dim = (s.stop - s.start - 1) // s.step + 1 else: dim = (s.stop - s.start + 1) // s.step + 1 shape.append(dim) strides.append(self._strides[j] * s.step) offset += s.start * self._strides[j] j += 1 elif numpy.isscalar(s): s = int(s) if s >= self._shape[j]: raise IndexError('Index %s exceeds the size %s at axis %s' % (s, self._shape[j], j)) offset += s * self._strides[j] j += 1 else: raise TypeError('Invalid index type: %s' % type(slices[i])) v = self.view() v._shape = tuple(shape) v._strides = tuple(strides) v._size = internal.prod(shape) v.data = self.data + offset v._c_contiguous = -1 v._f_contiguous = -1 return v
def shaped_reverse_arange(shape, xp=cupy, dtype=numpy.float32): size = internal.prod(shape) a = numpy.arange(size, 0, -1) return xp.array(a.astype(dtype).reshape(shape))
def shaped_arange(shape, xp=cupy, dtype=numpy.float32): a = numpy.arange(1, internal.prod(shape) + 1, 1) if numpy.dtype(dtype).type == numpy.bool_: return xp.array((a % 2 == 0).reshape(shape)) else: return xp.array(a.astype(dtype).reshape(shape))
def __getitem__(self, slices): # It supports the basic indexing (by slices, ints or Ellipsis) only. # TODO(beam2d): Support the advanced indexing of NumPy. if not isinstance(slices, tuple): slices = [slices] else: slices = list(slices) if any(isinstance(s, ndarray) for s in slices): raise ValueError('Advanced indexing is not supported') # Expand ellipsis into empty slices n_newaxes = slices.count(newaxis) n_ellipses = slices.count(Ellipsis) if n_ellipses > 0: if n_ellipses > 1: raise ValueError('Only one Ellipsis is allowed in index') ellipsis = slices.index(Ellipsis) ellipsis_size = self.ndim - (len(slices) - n_newaxes - 1) slices[ellipsis:ellipsis + 1] = [slice(None)] * ellipsis_size slices += [slice(None)] * (self.ndim - len(slices) + n_newaxes) # Create new shape and stride shape = [] strides = [] j = 0 offset = 0 for i, s in enumerate(slices): if s is newaxis: shape.append(1) if j < self.ndim: strides.append(self._strides[j]) elif self.ndim > 0: strides.append(self._strides[-1]) else: strides.append(self.itemsize) elif isinstance(s, slice): s = internal.complete_slice(s, self._shape[j]) if s.step > 0: dim = (s.stop - s.start - 1) // s.step + 1 else: dim = (s.stop - s.start + 1) // s.step + 1 shape.append(dim) strides.append(self._strides[j] * s.step) offset += s.start * self._strides[j] j += 1 elif numpy.isscalar(s): s = int(s) if s >= self._shape[j]: raise IndexError( 'Index %s exceeds the size %s at axis %s' % (s, self._shape[j], j)) offset += s * self._strides[j] j += 1 else: raise TypeError('Invalid index type: %s' % type(slices[i])) v = self.view() v._shape = tuple(shape) v._strides = tuple(strides) v._size = internal.prod(shape) v.data = self.data + offset v._c_contiguous = -1 v._f_contiguous = -1 return v
def tensordot(a, b, axes=2): """Returns the tensor dot product of two arrays along specified axes. This is equivalent to compute dot product along the specified axes which are treated as one axis by reshaping. Args: a (cupy.ndarray): The first argument. b (cupy.ndarray): The second argument. axes: - If it is an integer, then ``axes`` axes at the last of ``a`` and the first of ``b`` are used. - If it is a pair of sequences of integers, then these two sequences specify the list of axes for ``a`` and ``b``. The corresponding axes are paired for sum-product. out (cupy.ndarray): Output array. Returns: cupy.ndarray: The tensor dot product of ``a`` and ``b`` along the axes specified by ``axes``. .. seealso:: :func:`numpy.tensordot` """ a_ndim = a.ndim b_ndim = b.ndim if a_ndim == 0 or b_ndim == 0: if axes != 0 and axes != ((), ()): raise ValueError('An input is zero-dim while axes has dimensions') return cupy.multiply(a, b) if isinstance(axes, collections.Sequence): if len(axes) != 2: raise ValueError('Axes must consist of two arrays.') a_axes, b_axes = axes if numpy.isscalar(a_axes): a_axes = a_axes, if numpy.isscalar(b_axes): b_axes = b_axes, else: a_axes = tuple(six.moves.range(a_ndim - axes, a_ndim)) b_axes = tuple(six.moves.range(axes)) sum_ndim = len(a_axes) if sum_ndim != len(b_axes): raise ValueError('Axes length mismatch') for a_axis, b_axis in zip(a_axes, b_axes): if a.shape[a_axis] != b.shape[b_axis]: raise ValueError('Axis dimension mismatch') # Make the axes non-negative a = _move_axes_to_head(a, [axis % a_ndim for axis in a_axes]) b = _move_axes_to_head(b, [axis % b_ndim for axis in b_axes]) ret_shape = a.shape[sum_ndim:] + b.shape[sum_ndim:] k = internal.prod(a.shape[:sum_ndim]) n = a.size // k m = b.size // k return _tensordot_core(a, b, None, n, m, k, ret_shape)