def na_op(x, y): try: result = expressions.evaluate( op, str_rep, x, y, raise_on_error=True, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, pd.Series)): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() mask = notnull(xrav) & notnull(yrav) xrav = xrav[mask] yrav = yrav[mask] if np.prod(xrav.shape) and np.prod(yrav.shape): result[mask] = op(xrav, yrav) elif hasattr(x,'size'): result = np.empty(x.size, dtype=x.dtype) mask = notnull(xrav) xrav = xrav[mask] if np.prod(xrav.shape): result[mask] = op(xrav, y) else: raise TypeError("cannot perform operation {op} between objects " "of type {x} and {y}".format(op=name,x=type(x),y=type(y))) result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = com._fill_zeros(result, x, y, name, fill_zeros) return result
def __init__(self, dfs, column_name): self.dfs = dfs self.column_name = column_name dtypes = [df.dtype(column_name) for df in dfs] self.is_masked = any([df.is_masked(column_name) for df in dfs]) if self.is_masked: self.fill_value = dfs[0].columns[self.column_name].fill_value # np.datetime64 and find_common_type don't mix very well any_strings = any([dtype == str_type for dtype in dtypes]) if any_strings: self.dtype = str_type else: if all([dtype.type == np.datetime64 for dtype in dtypes]): self.dtype = dtypes[0] else: if all([dtype == dtypes[0] for dtype in dtypes]): # find common types doesn't always behave well self.dtype = dtypes[0] if any([dtype.kind in 'SU' for dtype in dtypes]): # strings are also done manually if all([dtype.kind in 'SU' for dtype in dtypes]): index = np.argmax([dtype.itemsize for dtype in dtypes]) self.dtype = dtypes[index] else: index = np.argmax([df.columns[self.column_name].astype('O').astype('U').dtype.itemsize for df in dfs]) self.dtype = dfs[index].columns[self.column_name].astype('O').astype('U').dtype else: self.dtype = np.find_common_type(dtypes, []) logger.debug("common type for %r is %r", dtypes, self.dtype) self.shape = (len(self), ) + self.dfs[0].evaluate(self.column_name, i1=0, i2=1).shape[1:] for i in range(1, len(dfs)): shape_i = (len(self), ) + self.dfs[i].evaluate(self.column_name, i1=0, i2=1).shape[1:] if self.shape != shape_i: raise ValueError("shape of of column %s, array index 0, is %r and is incompatible with the shape of the same column of array index %d, %r" % (self.column_name, self.shape, i, shape_i))
def _common_dtype(x, y): """Determines common numpy DTYPE for arrays.""" dtype = np.find_common_type([x.dtype, y.dtype], []) if x.dtype != dtype: x = x.astype(dtype) if y.dtype != dtype: y = y.astype(dtype) return x, y
def _bmat(blocks, dtypes): from pytools import single_valued from pytential.symbolic.matrix import is_zero nrows = blocks.shape[0] ncolumns = blocks.shape[1] # "block row starts"/"block column starts" brs = np.cumsum([0] + [single_valued(blocks[ibrow, ibcol].shape[0] for ibcol in range(ncolumns) if not is_zero(blocks[ibrow, ibcol])) for ibrow in range(nrows)]) bcs = np.cumsum([0] + [single_valued(blocks[ibrow, ibcol].shape[1] for ibrow in range(nrows) if not is_zero(blocks[ibrow, ibcol])) for ibcol in range(ncolumns)]) result = np.zeros((brs[-1], bcs[-1]), dtype=np.find_common_type(dtypes, [])) for ibcol in range(ncolumns): for ibrow in range(nrows): result[brs[ibrow]:brs[ibrow + 1], bcs[ibcol]:bcs[ibcol + 1]] = \ blocks[ibrow, ibcol] return result
def __imul__(self,other): ''' Overloaded self-multiplication(*=) operator, which supports the self-multiplication by a scalar. ''' self[0]*=other self.dtype=np.find_common_type([self.dtype,np.asarray(other).dtype],[]) return self
def upcast(*args): """Returns the nearest supported sparse dtype for the combination of one or more types. upcast(t0, t1, ..., tn) -> T where T is a supported dtype Examples -------- >>> upcast('int32') <type 'numpy.int32'> >>> upcast('bool') <type 'numpy.int8'> >>> upcast('int32','float32') <type 'numpy.float64'> >>> upcast('bool',complex,float) <type 'numpy.complex128'> """ t = _upcast_memo.get(hash(args)) if t is not None: return t upcast = np.find_common_type(args, []) for t in supported_dtypes: if np.can_cast(upcast, t): _upcast_memo[hash(args)] = t return t raise TypeError('no supported conversion for types: %s' % args)
def get_columns(row_indices, src_cols, n_rows): if not len(src_cols): return np.zeros((n_rows, 0), dtype=source.X.dtype) n_src_attrs = len(source.domain.attributes) if all(isinstance(x, int) and 0 <= x < n_src_attrs for x in src_cols): return source.X[row_indices, src_cols] if all(isinstance(x, int) and x < 0 for x in src_cols): return source.metas[row_indices, [-1 - x for x in src_cols]] if all(isinstance(x, int) and x >= n_src_attrs for x in src_cols): return source.Y[row_indices, [x - n_src_attrs for x in src_cols]] types = [] if any(isinstance(x, int) and 0 <= x < n_src_attrs for x in src_cols): types.append(source.X.dtype) if any(isinstance(x, int) and x < 0 for x in src_cols): types.append(source.metas.dtype) if any(isinstance(x, int) and x >= n_src_attrs for x in src_cols): types.append(source.Y.dtype) new_type = np.find_common_type(types, []) a = np.empty((n_rows, len(src_cols)), dtype=new_type) for i, col in enumerate(src_cols): if not isinstance(col, int): a[:, i] = col(source) elif col < 0: a[:, i] = source.metas[row_indices, -1 - col] elif col < n_src_attrs: a[:, i] = source.X[row_indices, col] else: a[:, i] = source.Y[row_indices, col - n_src_attrs] return a
def inverse_transform(self, X): """Convert the data back to the original representation. Parameters ---------- X : array-like or sparse matrix, shape [n_samples, n_encoded_features] The transformed data. Returns ------- X_tr : array-like, shape [n_samples, n_features] Inverse transformed array. """ check_is_fitted(self, 'categories_') X = check_array(X, accept_sparse='csr') n_samples, _ = X.shape n_features = len(self.categories_) # validate shape of passed X msg = ("Shape of the passed X data is not correct. Expected {0} " "columns, got {1}.") if X.shape[1] != n_features: raise ValueError(msg.format(n_features, X.shape[1])) # create resulting array of appropriate dtype dt = np.find_common_type([cat.dtype for cat in self.categories_], []) X_tr = np.empty((n_samples, n_features), dtype=dt) for i in range(n_features): labels = X[:, i].astype('int64') X_tr[:, i] = self.categories_[i][labels] return X_tr
def common_dtype (vars): # {{{ import numpy as np from pygeode.var import Var import re # Can work on PyGeode variables, numpy arrays, lists, tuples, or scalars dtypes = [] for v in vars: if isinstance(v, (Var,np.ndarray)): dtypes.append(v.dtype) elif isinstance(v, (list,tuple)): dtypes.append(np.asarray(v).dtype) else: dtypes.append(np.asarray([v]).dtype) # raise Exception ("unrecognized type '%s'"%type(v)) # Unfortunately, find_common_type is not available in older versions of numpy :( try: return np.find_common_type(dtypes, []) except AttributeError: from warnings import warn warn ("numpy.find_common_type not supported in this version of numpy. Using an alternative method.") # Create some empty arrays of the given types, and see what happens # when we combine them together arrays = [np.empty(0,dtype=d) for d in dtypes] return sum(arrays,arrays[0]).dtype
def _get_dtype(operators, dtypes=None): if dtypes is None: dtypes = [] for obj in operators: if obj is not None and hasattr(obj, 'dtype'): dtypes.append(obj.dtype) return np.find_common_type(dtypes, [])
def na_op(x, y): try: result = expressions.evaluate( op, str_rep, x, y, raise_on_error=True, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, pd.Series)): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() mask = notnull(xrav) & notnull(yrav) xrav = xrav[mask] yrav = yrav[mask] if np.prod(xrav.shape) and np.prod(yrav.shape): result[mask] = op(xrav, yrav) else: result = np.empty(x.size, dtype=x.dtype) mask = notnull(xrav) xrav = xrav[mask] if np.prod(xrav.shape): result[mask] = op(xrav, y) result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = com._fill_zeros(result, x, y, name, fill_zeros) return result
def fromoperator(operator,degfres,layer=0): ''' Constructor, which converts an operator to an optstr. Parameters ---------- operator : SOperator/FockOperator The operator to be converted to an optstr. degfres : DegFreTree The degfretree of the system. layer : int/tuple-of-str, optional The layer where the converted optstr lives. Returns ------- OptStr The corresponding OptStr. ''' assert isinstance(operator,SOperator) or isinstance(operator,FockOperator) layer=degfres.layers[layer] if type(layer) is int else layer table,sites=degfres.table(degfres.layers[-1]),degfres.labels('S',degfres.layers[-1]) operator=operator if isinstance(operator,SOperator) else JWBosonization(operator,table) opts=[] permutation=sorted(range(len(operator.indices)),key=lambda k: table[operator.indices[k]]) for i,k in enumerate(permutation): index,matrix=operator.indices[k],operator.spins[k] opts.append(Opt(sites[table[index]],{matrix.tag:[operator.value if i==0 else 1.0,np.asarray(matrix)]})) return OptStr(opts,np.find_common_type([np.asarray(operator.value).dtype]+[matrix.dtype for matrix in operator.spins],[])).relayer(degfres,layer)
def common_type(arrays): """ Returns a type which is common to the input arrays. All input arrays can be safely cast to the returned dtype without loss of information. Notes ----- If list of arrays mixes 'numeric' and 'string' types, the function returns 'object' as common type. """ arrays = [np.asarray(a) for a in arrays] dtypes = [a.dtype for a in arrays] meta_kinds = [_meta_kind.get(dt.kind, 'other') for dt in dtypes] # mixing string and numeric => object if any(mk != meta_kinds[0] for mk in meta_kinds[1:]): return object elif meta_kinds[0] == 'numeric': return np.find_common_type(dtypes, []) elif meta_kinds[0] == 'str': need_unicode = any(dt.kind == 'U' for dt in dtypes) # unicode are coded with 4 bytes max_size = max(dt.itemsize // 4 if dt.kind == 'U' else dt.itemsize for dt in dtypes) return np.dtype(('U' if need_unicode else 'S', max_size)) else: return object
def _find_common_type(types): """Find a common data type among the given dtypes.""" # TODO: enable using pandas-specific types if any(isinstance(t, ExtensionDtype) for t in types): raise TypeError("Common type discovery is currently only " "supported for pure numpy dtypes.") return np.find_common_type(types, [])
def _filled(self, data, wcs=None, fill=np.nan, view=()): """ Replace the exluded elements of *array* with *fill*. Parameters ---------- data : array-like Input array fill : number Replacement value view : tuple, optional Any slicing to apply to the data before flattening Returns ------- filled_array : `~numpy.ndarray` A 1-D ndarray containing the filled output Notes ----- This is an internal method used by :class:`SpectralCube`. Users should use the property :meth:`MaskBase.filled_data` """ # Must convert to floating point, but should not change from inherited # type otherwise dt = np.find_common_type([data.dtype], [np.float]) sliced_data = data[view].astype(dt) ex = self.exclude(data=data, wcs=wcs, view=view) sliced_data[ex] = fill return sliced_data
def _common_dtype(x, y): dtype = np.find_common_type([x.dtype, y.dtype], []) if x.dtype != dtype: x = x.astype(dtype) if y.dtype != dtype: y = y.astype(dtype) return x, y
def __init__(self, c1, c2, line_offset=0): """Create the comparitor instance @param offset: a tuple containing the spatial offset of the smaller cube inside the larger cube as (line_offset, sample_offset) """ # Set up the cube attributes so cube1 holds the bigger cube if c1.lines >= c2.lines and c1.samples >= c2.samples and c1.bands == c2.bands: self.cube1 = c1 self.cube2 = c2 elif c2.lines > c1.lines and c2.samples > c1.samples and c1.bands == c2.bands: self.cube1 = c2 self.cube2 = c1 else: raise ValueError("Can't determine which cube is supposed to be the subset of the other: if cubes aren't the same size, one cube must be a spatial subset of the other and both must have the same number of bands") # common dimensions are from the smaller cube self.lines = self.cube2.lines self.bands = self.cube2.bands self.samples = self.cube2.samples if line_offset > 0: self.line_offset = line_offset else: self.line_offset = 0 # Parameters common to both cubes self.bbl = self.cube1.getBadBandList(self.cube2) # The data type that can hold both types without losing precision self.dtype = numpy.find_common_type([self.cube1.data_type, self.cube2.data_type], []) self.histogram=None self.hashPrintCount=100000
def mean_images(list_spatial_images, list_spatial_masks=None, param_str_1=MEANIMAGES_DEFAULT, param_str_2=None): """ Mean image algorithms. Parameters ---------- :param list list_spatial_images: input list of *SpatialImage* (grayscale) :param list list_spatial_masks: optional, input list of *SpatialImages* (binary) :param str param_str_1: MEANIMAGES_DEFAULT, by default a mean image is computed :param str param_str_2: optional, optional parameters Returns ---------- :return: *SpatialImage* output image Example ------- >>> from timagetk.util import data_path >>> from timagetk.components import imread >>> from timagetk.algorithms import mean_images >>> img_path = data_path('time_0_cut.inr') >>> input_image = imread(img_path) >>> output_image = mean_images([input_image, input_image, input_image]) """ conds = [True if isinstance(val, SpatialImage) else False for ind, val in enumerate(list_spatial_images)] if False not in conds: dtype_list = [sp_img.dtype for ind, sp_img in enumerate(list_spatial_images)] comm_type = np.find_common_type(dtype_list, []) if list_spatial_masks is None: mask_ptr = None else: list_c_vt_spatial_masks = POINTER(_VT_IMAGE) * len(list_spatial_images) c_input_masks = [] for ind, spatial_mask in enumerate(list_spatial_masks): vt_input_mask = vt_image(spatial_mask) c_input_masks.append(vt_input_mask.get_vt_image()) mask_ptr = list_c_vt_spatial_masks( *[pointer(c_input_mask) for c_input_mask in c_input_masks]) list_c_vt_images = POINTER(_VT_IMAGE) * len(list_spatial_images) c_input_images = [] for ind, spatial_image in enumerate(list_spatial_images): vt_input = vt_image(spatial_image) c_input_images.append(vt_input.get_vt_image()) sp_img_ptr = list_c_vt_images(*[pointer(c_input) for c_input in c_input_images]) vt_res = new_vt_image(list_spatial_images[0], dtype=comm_type) rvalue = libvtexec.API_meanImages(sp_img_ptr, mask_ptr, len(list_spatial_images), vt_res.c_ptr, param_str_1, param_str_2) out_sp_img = return_value(vt_res.get_spatial_image(), rvalue) return out_sp_img else: raise TypeError('Input images must be a list of SpatialImage') return
def find_best_blas_type(arrays=(), dtype=None): """Find best-matching BLAS/LAPACK type. Arrays are used to determine the optimal prefix of BLAS routines. Parameters ---------- arrays : sequence of ndarrays, optional Arrays can be given to determine optimal prefix of BLAS routines. If not given, double-precision routines will be used, otherwise the most generic type in arrays will be used. dtype : str or dtype, optional Data-type specifier. Not used if `arrays` is non-empty. Returns ------- prefix : str BLAS/LAPACK prefix character. dtype : dtype Inferred Numpy data type. prefer_fortran : bool Whether to prefer Fortran order routines over C order. Examples -------- >>> import scipy.linalg.blas as bla >>> a = np.random.rand(10,15) >>> b = np.asfortranarray(a) # Change the memory layout order >>> bla.find_best_blas_type((a,)) ('d', dtype('float64'), False) >>> bla.find_best_blas_type((a*1j,)) ('z', dtype('complex128'), False) >>> bla.find_best_blas_type((b,)) ('d', dtype('float64'), True) """ dtype = _np.dtype(dtype) prefer_fortran = False if arrays: # use the most generic type in arrays dtypes = [ar.dtype for ar in arrays] dtype = _np.find_common_type(dtypes, ()) try: index = dtypes.index(dtype) except ValueError: index = 0 if arrays[index].flags['FORTRAN']: # prefer Fortran for leading array with column major order prefer_fortran = True prefix = _type_conv.get(dtype.char, 'd') if dtype.char == 'G': # complex256 -> complex128 (i.e., C long double -> C double) dtype = _np.dtype('D') elif dtype.char not in 'fdFD': dtype = _np.dtype('d') return prefix, dtype, prefer_fortran
def __mul__(self,other): ''' Overloaded multiplication(*) operator, which supports the multiplication of an optstr with a scalar. ''' result=copy(self) result[0]=result[0]*other result.dtype=np.find_common_type([result.dtype,np.asarray(other).dtype],[]) return result
def forward_cpu(self, x): a, b = x batch_size = a.shape[0] shape = self._output_shape(a, b) ret_dtype = numpy.find_common_type([a.dtype, b.dtype], []) ret = numpy.empty(shape, dtype=ret_dtype) for i in six.moves.range(batch_size): ret[i] = _matmul(a[i], b[i], transa=self.transa, transb=self.transb) return (ret,)
def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, check_input=True): """Aux function used at beginning of fit in linear models""" n_samples, n_features = X.shape if sparse.isspmatrix(X): # copy is not needed here as X is not modified inplace when X is sparse precompute = False X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, copy=False, return_mean=True, check_input=check_input) else: # copy was done in fit if necessary X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy, check_input=check_input) if hasattr(precompute, '__array__') and ( fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features))): warnings.warn("Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) # recompute Gram precompute = 'auto' Xy = None # precompute if n_samples > n_features if isinstance(precompute, str) and precompute == 'auto': precompute = (n_samples > n_features) if precompute is True: # make sure that the 'precompute' array is contiguous. precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype, order='C') np.dot(X.T, X, out=precompute) if not hasattr(precompute, '__array__'): Xy = None # cannot use Xy if precompute is not Gram if hasattr(precompute, '__array__') and Xy is None: common_dtype = np.find_common_type([X.dtype, y.dtype], []) if y.ndim == 1: # Xy is 1d, make sure it is contiguous. Xy = np.empty(shape=n_features, dtype=common_dtype, order='C') np.dot(X.T, y, out=Xy) else: # Make sure that Xy is always F contiguous even if X or y are not # contiguous: the goal is to make it fast to extract the data for a # specific target. n_targets = y.shape[1] Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype, order='F') np.dot(y.T, X, out=Xy.T) return X, y, X_offset, y_offset, X_scale, precompute, Xy
def cholesky(a): '''Cholesky decomposition. Decompose a given two-dimensional square matrix into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Note that in the current implementation ``a`` must be a real matrix, and only float32 and float64 are supported. Args: a (cupy.ndarray): The input matrix with dimension ``(N, N)`` .. seealso:: :func:`numpy.linalg.cholesky` ''' if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays _assert_cupy_array(a) _assert_rank2(a) _assert_nd_squareness(a) # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char x = a.astype(dtype, copy=True) n = len(a) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': buffersize = cusolver.spotrf_bufferSize( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float32) cusolver.spotrf( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) else: # dtype == 'd' buffersize = cusolver.dpotrf_bufferSize( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float64) cusolver.dpotrf( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) status = int(dev_info[0]) if status > 0: raise linalg.LinAlgError( 'The leading minor of order {} ' 'is not positive definite'.format(status)) elif status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') _tril(x, k=0) return x
def _matvec(self, v, out, nmax): v = np.asarray(v).ravel() self._validatein(v.shape) if out is None: out = np.zeros(v.size * self.shape[0] // self.shape[1], np.find_common_type([self.dtype, v.dtype], [])) elif not isinstance(out, np.ndarray): raise TypeError('The output array is not an ndarray.') elif not out.flags.contiguous: raise ValueError('The output array is not contiguous.') elif out.size != v.size * self.shape[0] // self.shape[1]: raise ValueError( "The output size '{0}' is incompatible with the number of rows" " of the sparse matrix '{1}'.".format(out.size, self.shape[0])) else: out = out.ravel() self._validateout(out.shape) di = self.data.index.dtype ds = self.dtype dv = v.dtype if str(ds) not in ('float32', 'float64') or \ str(di) not in ('int32', 'int64'): return v, out, False if dv.kind != 'f' or dv.type in (np.float16, np.float128) or \ dv.type is np.float32 and ds.type is np.float64: v = v.astype(self.dtype) dv = self.dtype if out.dtype != dv or not out.flags.contiguous: out_ = np.empty(out.size, dtype=dv) out_[...] = out else: out_ = out flib_id = self._flib_id if isinstance(self, (FSCMatrix, FSRMatrix)): extra_size = v.size // self.shape[1] if extra_size > 1: flib_id += '_homothety' f = '{0}_matvec_i{1}_r{2}_v{3}'.format( flib_id, di.itemsize, ds.itemsize, dv.itemsize) func = getattr(fsp, f) m = self.data.ravel().view(np.int8) if flib_id.endswith('_homothety'): func(m, v, out_, nmax, self.shape[1], self.shape[0], extra_size) elif flib_id.endswith('_block'): func(m, v, out_, nmax, self.shape[1] // self.block_shape[1], self.shape[0] // self.block_shape[0], self.block_shape[0], self.block_shape[1]) else: func(m, v, out_, nmax) if out.dtype != dv: out[...] = out_ return v, out, True
def concatenate(tup, axis=0): """Joins arrays along an axis. Args: tup (sequence of arrays): Arrays to be joined. All of these should have same dimensionalities except the specified axis. axis (int): The axis to join arrays along. Returns: cupy.ndarray: Joined array. .. seealso:: :func:`numpy.concatenate` """ ndim = None shape = None for a in tup: if not isinstance(a, cupy.ndarray): raise TypeError('Only cupy arrays can be concatenated') if a.ndim == 0: raise TypeError('zero-dimensional arrays cannot be concatenated') if ndim is None: ndim = a.ndim shape = list(a.shape) axis = _get_positive_axis(a.ndim, axis) continue if a.ndim != ndim: raise ValueError( 'All arrays to concatenate must have the same ndim') if any(i != axis and shape[i] != a.shape[i] for i in six.moves.range(ndim)): raise ValueError( 'All arrays must have same shape except the axis to ' 'concatenate') shape[axis] += a.shape[axis] if ndim is None: raise ValueError('Cannot concatenate from empty tuple') dtype = numpy.find_common_type([a.dtype for a in tup], []) ret = cupy.empty(shape, dtype=dtype) skip = (slice(None),) * axis i = 0 for a in tup: aw = a.shape[axis] ret[skip + (slice(i, i + aw),)] = a i += aw return ret
def __init__(self,optstrs,degfres): ''' Constructor. Parameters ---------- optstrs : list of OptStr The optstrs contained in the mpo. degfres : DegFreTree The tree of the site degrees of freedom. ''' layer=degfres.layers[degfres.level(optstrs[0][0].site.identifier)-1] table,sites,bonds=degfres.table(layer),degfres.labels('S',layer),degfres.labels('O',layer) for optstr in optstrs: optstr.connect(degfres) optstrs.sort(key=lambda optstr: (len(optstr),table[optstr[0].site.identifier],tuple(repr(opt) for opt in optstr))) rows,cols=[],[] for i,site in enumerate(sites): self.append( np.array([[Opt.zero(site),Opt.identity(site)]]) if i==0 else np.array([[Opt.identity(site)],[Opt.zero(site)]]) if i==len(sites)-1 else np.array([[Opt.identity(site),Opt.zero(site)],[Opt.zero(site),Opt.identity(site)]]) ) rows.append(1 if i==0 else 2) cols.append(1 if i==len(sites)-1 else 2) for optstr in optstrs: if len(optstr)==1: self[table[optstr[0].site.identifier]][-1,0]+=optstr[0] else: for i,opt in enumerate(optstr): pos=table[opt.site.identifier] if i==0: col=[Opt.zero(opt.site)]*rows[pos] col[-1]=opt self[pos]=np.insert(self[pos],-1,col,axis=1) cols[pos]+=1 elif i<len(optstr)-1: row=[Opt.zero(opt.site)]*cols[pos] self[pos]=np.insert(self[pos],-1,row,axis=0) rows[pos]+=1 col=[Opt.zero(opt.site)]*rows[pos] col[-2]=opt self[pos]=np.insert(self[pos],-1,col,axis=1) cols[pos]+=1 else: row=[Opt.zero(opt.site)]*cols[pos] row[0]=opt self[pos]=np.insert(self[pos],-1,row,axis=0) rows[pos]+=1 self.sites=sites self.bonds=bonds self.dtype=np.find_common_type([optstr.dtype for optstr in optstrs],[])
def full_transform(Matrix, Tensor): """ Transforms the Tensor to Representation in new Basis with given Transformation Matrix (but using somehow the transformed matrix :/). """ Matrix = np.array(Matrix) Tensor = np.array(Tensor) dtype = np.find_common_type([],[Matrix.dtype, Tensor.dtype]) Tnew = np.zeros_like(Tensor, dtype = dtype) for ind in itertools.product(*map(range, Tensor.shape)): # i for inds in itertools.product(*map(range, Tensor.shape)): #j Tnew[ind] += Tensor[inds] * Matrix[inds, ind].prod() # #print Matrix[inds, ind], Tnew return Tnew
def find_common_type(types): """ Find a common data type among the given dtypes. Parameters ---------- types : list of dtypes Returns ------- pandas extension or numpy dtype See Also -------- numpy.find_common_type """ if len(types) == 0: raise ValueError('no types given') first = types[0] # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) # => object if all(is_dtype_equal(first, t) for t in types[1:]): return first if any(isinstance(t, (PandasExtensionDtype, ExtensionDtype)) for t in types): return np.object # take lowest unit if all(is_datetime64_dtype(t) for t in types): return np.dtype('datetime64[ns]') if all(is_timedelta64_dtype(t) for t in types): return np.dtype('timedelta64[ns]') # don't mix bool / int or float or complex # this is different from numpy, which casts bool with float/int as int has_bools = any(is_bool_dtype(t) for t in types) if has_bools: has_ints = any(is_integer_dtype(t) for t in types) has_floats = any(is_float_dtype(t) for t in types) has_complex = any(is_complex_dtype(t) for t in types) if has_ints or has_floats or has_complex: return np.object return np.find_common_type(types, [])
def find_common_dtype(*args): '''Returns common dtype of numpy and scipy objects. Recognizes ndarray, spmatrix and LinearOperator. All other objects are ignored (most notably None).''' dtypes = [] for arg in args: if type(arg) is numpy.ndarray or \ isspmatrix(arg) or \ isinstance(arg, LinearOperator): if hasattr(arg, 'dtype'): dtypes.append(arg.dtype) else: warnings.warn('object %s does not have a dtype.' % arg.__repr__) return numpy.find_common_type(dtypes, [])
def __init__(self, t, c, k, dtype=None): # Try to keep float 32 if dtypes are available if dtype is None: types = [d.dtype for d in [c, t] if isinstance(d, np.ndarray)] dtype = np.find_common_type(types, []) if dtype is None: dtype = np.double dtype = _asvalid_dtype(dtype) k = _asvalid_k(k) t = _asvalid_t(t, k, dtype=dtype, copy=True) c = _asvalid_c(c, t, k, dtype=dtype, copy=True) self._dtype = dtype self._k = k self._t = t self._c = c
def __init__(self, op, alpha): dtype = _np.find_common_type([op.dtype], [type(alpha)]) self._op = op self._alpha = alpha super().__init__(dtype, op.shape)
def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, check_input=True): """Aux function used at beginning of fit in linear models""" n_samples, n_features = X.shape if sparse.isspmatrix(X): # copy is not needed here as X is not modified inplace when X is sparse precompute = False X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, copy=False, return_mean=True, check_input=check_input) else: # copy was done in fit if necessary X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy, check_input=check_input) if hasattr(precompute, '__array__') and ( fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features))): warnings.warn( "Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) # recompute Gram precompute = 'auto' Xy = None # precompute if n_samples > n_features if isinstance(precompute, str) and precompute == 'auto': precompute = (n_samples > n_features) if precompute is True: # make sure that the 'precompute' array is contiguous. precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype, order='C') np.dot(X.T, X, out=precompute) if not hasattr(precompute, '__array__'): Xy = None # cannot use Xy if precompute is not Gram if hasattr(precompute, '__array__') and Xy is None: common_dtype = np.find_common_type([X.dtype, y.dtype], []) if y.ndim == 1: # Xy is 1d, make sure it is contiguous. Xy = np.empty(shape=n_features, dtype=common_dtype, order='C') np.dot(X.T, y, out=Xy) else: # Make sure that Xy is always F contiguous even if X or y are not # contiguous: the goal is to make it fast to extract the data for a # specific target. n_targets = y.shape[1] Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype, order='F') np.dot(y.T, X, out=Xy.T) return X, y, X_offset, y_offset, X_scale, precompute, Xy
def inverse_transform(self, X): """Convert back the data to the original representation. In case unknown categories are encountered (all zero's in the one-hot encoding), ``None`` is used to represent this category. Parameters ---------- X : array-like or sparse matrix, shape [n_samples, n_encoded_features] The transformed data. Returns ------- X_tr : array-like, shape [n_samples, n_features] Inverse transformed array. """ check_is_fitted(self, 'categories_') X = check_array(X, accept_sparse='csr') n_samples, _ = X.shape n_features = len(self.categories_) n_transformed_features = sum([len(cats) for cats in self.categories_]) # validate shape of passed X msg = ("Shape of the passed X data is not correct. Expected {0} " "columns, got {1}.") if self.encoding == 'ordinal' and X.shape[1] != n_features: raise ValueError(msg.format(n_features, X.shape[1])) elif (self.encoding.startswith('onehot') and X.shape[1] != n_transformed_features): raise ValueError(msg.format(n_transformed_features, X.shape[1])) # create resulting array of appropriate dtype dt = np.find_common_type([cat.dtype for cat in self.categories_], []) X_tr = np.empty((n_samples, n_features), dtype=dt) if self.encoding == 'ordinal': for i in range(n_features): labels = X[:, i].astype('int64') X_tr[:, i] = self.categories_[i][labels] else: # encoding == 'onehot' / 'onehot-dense' j = 0 found_unknown = {} for i in range(n_features): n_categories = len(self.categories_[i]) sub = X[:, j:j + n_categories] # for sparse X argmax returns 2D matrix, ensure 1D array labels = np.asarray(_argmax(sub, axis=1)).flatten() X_tr[:, i] = self.categories_[i][labels] if self.handle_unknown == 'ignore': # ignored unknown categories: we have a row of all zero's unknown = np.asarray(sub.sum(axis=1) == 0).flatten() if unknown.any(): found_unknown[i] = unknown j += n_categories # if ignored are found: potentially need to upcast result to # insert None values if found_unknown: if X_tr.dtype != object: X_tr = X_tr.astype(object) for idx, mask in found_unknown.items(): X_tr[mask, idx] = None return X_tr
def test_scalar_wins3(self): # doesn't go up to 'f16' on purpose res = np.find_common_type(['u8', 'i8', 'i8'], ['f8']) assert_(res == 'f8')
def batched_gtsv(dl, d, du, B, algo='cyclic_reduction'): """Solves multiple tridiagonal systems (This is a bang method for B.) Args: dl, d, du (cupy.ndarray): Lower, main and upper diagonal vectors with last-dim sizes of N-1, N and N-1, repsectively. Only two dimensional inputs are supported currently. The first dim is the batch dim. B (cupy.ndarray): Right-hand side vectors The first dim is the batch dim and the second dim is N. algo (str): algorithm, choose one from four algorithms; cyclic_reduction, cuThomas, LU_w_pivoting and QR. cuThomas is numerically unstable, and LU_w_pivoting is the LU algorithm with pivoting. """ if algo not in ["cyclic_reduction", "cuThomas", "LU_w_pivoting", "QR"]: raise ValueError(f"Unknown algorithm [{algo}]") util._assert_cupy_array(dl) util._assert_cupy_array(d) util._assert_cupy_array(du) util._assert_cupy_array(B) if dl.ndim != 2 or d.ndim != 2 or du.ndim != 2 or B.ndim != 2: raise ValueError('dl, d, du and B must be 2-d arrays') batchsize = d.shape[0] if batchsize != dl.shape[0] or batchsize != du.shape[ 0] or batchsize != B.shape[0]: raise ValueError( 'The first dims of dl, du and B must match that of d.') N = d.shape[1] # the size of the linear system if dl.shape[1] != N - 1 or du.shape[1] != N - 1 or B.shape[1] != N: raise ValueError( 'The second dims of dl, du and B must match the second dim of d.') # the first element must be zero of dl padded_dl = cupy.ascontiguousarray( cupy.pad(dl, ((0, 0), (1, 0)), mode='constant', constant_values=0.0)) # the last element must be zero of du padded_du = cupy.ascontiguousarray( cupy.pad(du, ((0, 0), (0, 1)), mode='constant', constant_values=0.0)) # contiguous d = cupy.ascontiguousarray(d) B = cupy.ascontiguousarray(B) # Cast to float32 or float64 if d.dtype == 'f' or d.dtype == 'd': dtype = d.dtype else: dtype = numpy.find_common_type((d.dtype, 'f'), ()) handle = device.get_cusparse_handle() if dtype == 'f': if algo == "cyclic_reduction": gtsv2 = cusparse.sgtsv2StridedBatch get_buffer_size = cusparse.sgtsv2StridedBatch_bufferSizeExt # buffer_size = numpy.empty(1, numpy.int32) get_buffer_size(handle, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, N, buffer_size.ctypes.data) buffer_size = int(buffer_size) buffer = cupy.zeros((buffer_size, ), dtype=cupy.uint8) gtsv2(handle, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, N, buffer.data.ptr) else: raise NotImplementedError if algo == "cuThomas": algo_num = 0 elif algo == "LU_w_pivoting": algo_num = 1 elif algo == "QR": algo_num = 2 else: raise ValueError gtsv2 = cusparse.sgtsvInterleavedBatch get_buffer_size = cusparse.sgtsvInterleavedBatch_bufferSizeExt # buffer_size = get_buffer_size(handle, algo_num, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize) buffer = cupy.zeros((buffer_size, ), dtype=cupy.uint8) gtsv2(handle, algo_num, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, buffer.data.ptr) else: raise NotImplementedError return B
def _pre_fit( X, y, Xy, precompute, normalize, fit_intercept, copy, check_input=True, sample_weight=None, ): """Function used at beginning of fit in linear models with L1 or L0 penalty. This function applies _preprocess_data and additionally computes the gram matrix `precompute` as needed as well as `Xy`. Parameters ---------- order : 'F', 'C' or None, default=None Whether X and y will be forced to be fortran or c-style. Only relevant if sample_weight is not None. """ n_samples, n_features = X.shape if sparse.isspmatrix(X): # copy is not needed here as X is not modified inplace when X is sparse precompute = False X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, copy=False, check_input=check_input, sample_weight=sample_weight, ) else: # copy was done in fit if necessary X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy, check_input=check_input, sample_weight=sample_weight, ) # Rescale only in dense case. Sparse cd solver directly deals with # sample_weight. if sample_weight is not None: # This triggers copies anyway. X, y, _ = _rescale_data(X, y, sample_weight=sample_weight) # FIXME: 'normalize' to be removed in 1.2 if hasattr(precompute, "__array__"): if ( fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features)) ): warnings.warn( "Gram matrix was provided but X was centered to fit " "intercept, or X was normalized : recomputing Gram matrix.", UserWarning, ) # recompute Gram precompute = "auto" Xy = None elif check_input: # If we're going to use the user's precomputed gram matrix, we # do a quick check to make sure its not totally bogus. _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale) # precompute if n_samples > n_features if isinstance(precompute, str) and precompute == "auto": precompute = n_samples > n_features if precompute is True: # make sure that the 'precompute' array is contiguous. precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype, order="C") np.dot(X.T, X, out=precompute) if not hasattr(precompute, "__array__"): Xy = None # cannot use Xy if precompute is not Gram if hasattr(precompute, "__array__") and Xy is None: common_dtype = np.find_common_type([X.dtype, y.dtype], []) if y.ndim == 1: # Xy is 1d, make sure it is contiguous. Xy = np.empty(shape=n_features, dtype=common_dtype, order="C") np.dot(X.T, y, out=Xy) else: # Make sure that Xy is always F contiguous even if X or y are not # contiguous: the goal is to make it fast to extract the data for a # specific target. n_targets = y.shape[1] Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype, order="F") np.dot(y.T, X, out=Xy.T) return X, y, X_offset, y_offset, X_scale, precompute, Xy
def test_scalar_wins(self): res = np.find_common_type(['f4', 'f4', 'i2'], ['c8']) assert_(res == 'c8')
def add_categories(self, new_categories, inplace=False): """ Add new categories. `new_categories` will be included at the last/highest place in the categories and will be unused directly after this call. Parameters ---------- new_categories : category or list-like of category The new categories to be included. inplace : bool, default False Whether or not to add the categories inplace or return a copy of this categorical with added categories. Returns ------- cat Categorical with new categories added or None if inplace. Examples -------- >>> import cudf >>> s = cudf.Series([1, 2], dtype="category") >>> s 0 1 1 2 dtype: category Categories (2, int64): [1, 2] >>> s.cat.add_categories([0, 3, 4]) 0 1 1 2 dtype: category Categories (5, int64): [1, 2, 0, 3, 4] >>> s 0 1 1 2 dtype: category Categories (2, int64): [1, 2] >>> s.cat.add_categories([0, 3, 4], inplace=True) >>> s 0 1 1 2 dtype: category Categories (5, int64): [1, 2, 0, 3, 4] """ old_categories = self._column.categories new_categories = column.as_column( new_categories, dtype=old_categories.dtype if len(new_categories) == 0 else None, ) if is_mixed_with_object_dtype(old_categories, new_categories): raise TypeError( f"cudf does not support adding categories with existing " f"categories of dtype `{old_categories.dtype}` and new " f"categories of dtype `{new_categories.dtype}`, please " f"type-cast new_categories to the same type as " f"existing categories.") common_dtype = np.find_common_type( [old_categories.dtype, new_categories.dtype], []) new_categories = new_categories.astype(common_dtype) old_categories = old_categories.astype(common_dtype) if old_categories.isin(new_categories).any(): raise ValueError("new categories must not include old categories") new_categories = old_categories.append(new_categories) out_col = self._column if not self._categories_equal(new_categories): out_col = self._set_categories(old_categories, new_categories) return self._return_or_inplace(out_col, inplace=inplace)
def inverse_transform(self, X): """ Convert the data back to the original representation. When unknown categories are encountered (all zeros in the one-hot encoding), ``None`` is used to represent this category. If the feature with the unknown category has a dropped caregory, the dropped category will be its inverse. Parameters ---------- X : {array-like, sparse matrix} of shape \ (n_samples, n_encoded_features) The transformed data. Returns ------- X_tr : ndarray of shape (n_samples, n_features) Inverse transformed array. """ check_is_fitted(self) X = check_array(X, accept_sparse="csr") n_samples, _ = X.shape n_features = len(self.categories_) if self.drop_idx_ is None: n_transformed_features = sum( len(cats) for cats in self.categories_) else: n_transformed_features = sum( len(cats) - 1 if to_drop is not None else len(cats) for cats, to_drop in zip(self.categories_, self.drop_idx_)) # validate shape of passed X msg = ( "Shape of the passed X data is not correct. Expected {0} columns, got {1}." ) if X.shape[1] != n_transformed_features: raise ValueError(msg.format(n_transformed_features, X.shape[1])) # create resulting array of appropriate dtype dt = np.find_common_type([cat.dtype for cat in self.categories_], []) X_tr = np.empty((n_samples, n_features), dtype=dt) j = 0 found_unknown = {} for i in range(n_features): if self.drop_idx_ is None or self.drop_idx_[i] is None: cats = self.categories_[i] else: cats = np.delete(self.categories_[i], self.drop_idx_[i]) n_categories = len(cats) # Only happens if there was a column with a unique # category. In this case we just fill the column with this # unique category value. if n_categories == 0: X_tr[:, i] = self.categories_[i][self.drop_idx_[i]] j += n_categories continue sub = X[:, j:j + n_categories] # for sparse X argmax returns 2D matrix, ensure 1D array labels = np.asarray(sub.argmax(axis=1)).flatten() X_tr[:, i] = cats[labels] if self.handle_unknown == "ignore": unknown = np.asarray(sub.sum(axis=1) == 0).flatten() # ignored unknown categories: we have a row of all zero if unknown.any(): # if categories were dropped then unknown categories will # be mapped to the dropped category if self.drop_idx_ is None or self.drop_idx_[i] is None: found_unknown[i] = unknown else: X_tr[unknown, i] = self.categories_[i][self.drop_idx_[i]] else: dropped = np.asarray(sub.sum(axis=1) == 0).flatten() if dropped.any(): if self.drop_idx_ is None: all_zero_samples = np.flatnonzero(dropped) raise ValueError( f"Samples {all_zero_samples} can not be inverted " "when drop=None and handle_unknown='error' " "because they contain all zeros") # we can safely assume that all of the nulls in each column # are the dropped value X_tr[dropped, i] = self.categories_[i][self.drop_idx_[i]] j += n_categories # if ignored are found: potentially need to upcast result to # insert None values if found_unknown: if X_tr.dtype != object: X_tr = X_tr.astype(object) for idx, mask in found_unknown.items(): X_tr[mask, idx] = None return X_tr
def extract_raster(self, src, return_array=False, progress=False): """Sample a Raster object by an aligned raster of labelled pixels. Parameters ---------- src: rasterio DatasetReader Single band raster containing labelled pixels as an open rasterio DatasetReader object. return_array : bool (opt), default=False By default the extracted pixel values are returned as a geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel values are returned as a tuple of numpy.ndarrays. progress : bool (opt), default=False Show a progress bar for extraction. Returns ------- geopandas.GeoDataFrame Geodataframe containing extracted data as point features if `return_array=False` tuple with three items if `return_array is True - numpy.ndarray Numpy masked array of extracted raster values, typically 2d. - numpy.ndarray 1d numpy masked array of labelled sampled. - numpy.ndarray 2d numpy masked array of row and column indexes of training pixels. """ # open response raster and get labelled pixel indices and values arr = src.read(1, masked=True) rows, cols = np.nonzero(~arr.mask) xys = np.transpose(rasterio.transform.xy(src.transform, rows, cols)) ys = arr.data[rows, cols] # extract Raster object values at row, col indices dtype = np.find_common_type([np.float32], self.dtypes) X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype) if progress is True: disable_tqdm = False else: disable_tqdm = True for i, (layer, pbar) in enumerate( zip(self.iloc, tqdm(self.iloc, total=self.count, disable=disable_tqdm))): sampler = sample_gen(dataset=layer.ds, xy=xys, indexes=layer.bidx, masked=True) v = np.ma.asarray([i for i in sampler]) X[:, i] = v.flatten() # summarize data if return_array is False: column_names = ["value"] + self.names gdf = pd.DataFrame(data=np.ma.column_stack((ys, X)), columns=column_names) gdf["geometry"] = list(zip(xys[:, 0], xys[:, 1])) gdf["geometry"] = gdf["geometry"].apply(Point) gdf = gpd.GeoDataFrame(gdf, geometry="geometry", crs=self.crs) return gdf return X, ys, xys
def process(self, thread_index, i1, i2, filter_mask, selection_masks, blocks): class Info(object): pass info = Info() info.i1 = i1 info.i2 = i2 info.first = i1 == 0 info.last = i2 == self.df.length_unfiltered() info.size = i2 - i1 masks = [ np.ma.getmaskarray(block) for block in blocks if np.ma.isMaskedArray(block) ] blocks = [ block.data if np.ma.isMaskedArray(block) else block for block in blocks ] blocks = [np.asarray(k) for k in blocks] mask = None # blocks = [as_flat_float(block) for block in blocks] if len(blocks) != 0: dtype = np.find_common_type([block.dtype for block in blocks], []) if dtype.str in ">f8 <f8 =f8": statistic_function = vaex.vaexfast.statisticNd_f8 elif dtype.str in ">f4 <f4 =f4": statistic_function = vaex.vaexfast.statisticNd_f4 elif dtype.str in ">i8 <i8 =i8": dtype = np.dtype(np.float64) statistic_function = vaex.vaexfast.statisticNd_f8 else: dtype = np.dtype(np.float32) statistic_function = vaex.vaexfast.statisticNd_f4 # print(dtype, statistic_function, histogram2d) if masks: mask = masks[0].copy() for other in masks[1:]: mask |= other blocks = [block[~mask] for block in blocks] this_thread_grid = self.grid for i, selection in enumerate(self.selections): if selection: selection_mask = selection_masks[i] if selection_mask is None: raise ValueError( "performing operation on selection while no selection present" ) if mask is not None: selection_mask = selection_mask[~mask] selection_blocks = [block[selection_mask] for block in blocks] else: selection_blocks = [block for block in blocks] little_endians = len([ k for k in selection_blocks if k.dtype != str and k.dtype.byteorder in ["<", "="] ]) if not ((len(selection_blocks) == little_endians) or little_endians == 0): def _to_native(ar): if ar.dtype == str: return ar # string are always fine if ar.dtype.byteorder not in ["<", "="]: dtype = ar.dtype.newbyteorder() return ar.astype(dtype) else: return ar selection_blocks = [_to_native(k) for k in selection_blocks] # subblock_weight = None subblock_weights = selection_blocks[len(self.expressions):] selection_blocks = list(selection_blocks[:len(self.expressions)]) if len(selection_blocks) == 0 and subblock_weights == []: if self.op == vaex.tasks.OP_ADD1: # special case for counting '*' (i.e. the number of rows) if selection or self.df.filtered: this_thread_grid[i][0] += np.sum(selection_mask) else: this_thread_grid[i][0] += i2 - i1 else: raise ValueError("Nothing to compute for OP %s" % self.op.code) # special case for counting string values etc elif len(selection_blocks) == 0 and len(subblock_weights) == 1 and self.op in [vaex.tasks.OP_COUNT]\ and (subblock_weights[0].dtype == str or subblock_weights[0].dtype.kind not in 'biuf'): weight = subblock_weights[0] mask = None if weight.dtype != str: if weight.dtype.kind == 'O': mask = vaex.strings.StringArray(weight).mask() else: mask = weight.get_mask() if selection or self.df.filtered: if mask is not None: this_thread_grid[i][0] += np.sum(~mask) else: this_thread_grid[i][0] += np.sum(selection_mask) else: if mask is not None: this_thread_grid[i][0] += len(mask) - mask.sum() else: this_thread_grid[i][0] += len(weight) else: selection_blocks = [ as_flat_array(block, dtype) for block in selection_blocks ] subblock_weights = [ as_flat_array(block, dtype) for block in subblock_weights ] statistic_function(selection_blocks, subblock_weights, this_thread_grid[i], self.minima, self.maxima, self.op.code, self.edges) return i2 - i1
def extract_vector(self, gdf, return_array=False, progress=False): """Sample a Raster/RasterLayer using a geopandas GeoDataframe containing points, lines or polygon features. Parameters ---------- gdf: geopandas.GeoDataFrame Containing either point, line or polygon geometries. Overlapping geometries will cause the same pixels to be sampled. return_array : bool (opt), default=False By default the extracted pixel values are returned as a geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel values are returned as a tuple of numpy.ndarrays. progress : bool (opt), default=False Show a progress bar for extraction. Returns ------- geopandas.GeoDataframe Containing extracted data as point geometries if `return_array=False`. tuple A tuple (geodataframe index, extracted values, coordinates) of the extracted raster values as a masked array and the coordinates of the extracted pixels if `as_gdf=False`. """ # rasterize polygon and line geometries if all(gdf.geom_type == "Polygon") or all( gdf.geom_type == "LineString"): shapes = [(geom, val) for geom, val in zip(gdf.geometry, gdf.index)] arr = np.ma.zeros((self.height, self.width)) arr[:] = -99999 arr = features.rasterize( shapes=shapes, fill=-99999, out=arr, transform=self.transform, all_touched=True, ) ids = arr[np.nonzero(arr != -99999)] ids = ids.astype("int") rows, cols = np.nonzero(arr != -99999) xys = rasterio.transform.xy(transform=self.transform, rows=rows, cols=cols) xys = np.transpose(xys) elif all(gdf.geom_type == "Point"): ids = gdf.index.values xys = gdf.bounds.iloc[:, 2:].values # extract raster pixels dtype = np.find_common_type([np.float32], self.dtypes) X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype) if progress is True: disable_tqdm = False else: disable_tqdm = True for i, (layer, pbar) in enumerate( zip(self.iloc, tqdm(self.iloc, total=self.count, disable=disable_tqdm))): sampler = sample_gen(dataset=layer.ds, xy=xys, indexes=layer.bidx, masked=True) v = np.ma.asarray([i for i in sampler]) X[:, i] = v.flatten() # return as geopandas array as default (or numpy arrays) if return_array is False: X = pd.DataFrame(np.ma.column_stack((ids, X)), columns=["id"] + self.names) X.id = X.id.astype("int") X["geometry"] = list(zip(xys[:, 0], xys[:, 1])) X["geometry"] = X["geometry"].apply(Point) X = gpd.GeoDataFrame(X, geometry="geometry", crs=self.crs) return X return ids, X, xys
def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict: """ Concatenate chunks of data read with low_memory=True. The tricky part is handling Categoricals, where different chunks may have different inferred categories. """ names = list(chunks[0].keys()) warning_columns = [] result = {} for name in names: arrs = [chunk.pop(name) for chunk in chunks] # Check each arr for consistent types. dtypes = {a.dtype for a in arrs} # TODO: shouldn't we exclude all EA dtypes here? numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)} if len(numpy_dtypes) > 1: # error: Argument 1 to "find_common_type" has incompatible type # "Set[Any]"; expected "Sequence[Union[dtype[Any], None, type, # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]" common_type = np.find_common_type( numpy_dtypes, # type: ignore[arg-type] [], ) if common_type == object: warning_columns.append(str(name)) dtype = dtypes.pop() if is_categorical_dtype(dtype): result[name] = union_categoricals(arrs, sort_categories=False) else: if isinstance(dtype, ExtensionDtype): # TODO: concat_compat? array_type = dtype.construct_array_type() # error: Argument 1 to "_concat_same_type" of "ExtensionArray" # has incompatible type "List[Union[ExtensionArray, ndarray]]"; # expected "Sequence[ExtensionArray]" result[name] = array_type._concat_same_type( arrs # type: ignore[arg-type] ) else: # Argument 1 to "concatenate" has incompatible type # "List[Union[ExtensionArray, ndarray[Any, Any]]]"; expected # "Union[_SupportsArray[dtype[Any]], # Sequence[_SupportsArray[dtype[Any]]], # Sequence[Sequence[_SupportsArray[dtype[Any]]]], # Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]], # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]]]]" result[name] = np.concatenate(arrs) # type: ignore[arg-type] if warning_columns: warning_names = ",".join(warning_columns) warning_message = " ".join([ f"Columns ({warning_names}) have mixed types. " f"Specify dtype option on import or set low_memory=False." ]) warnings.warn(warning_message, DtypeWarning, stacklevel=find_stack_level()) return result
def median(x, mask=None, axis=None, out=None): """ Return median of array. Parameters ---------- x : sequence or ndarray The input array. NaN values are discarded. Complex and floats of precision greater than 64 are not handled mask : ndarray, optional Boolean array mask whose True values indicate an element to be discarded. axis : {None, int}, optional Axis along which the medians are computed. The default (axis=None) is to compute the median along a flattened version of the array. out : ndarray, optional Alternative output array in which to place the result. It must have the same shape and buffer length as the expected output, but the type (of the output) will be cast if necessary. Returns ------- median : ndarray A new array holding the result (unless `out` is specified, in which case that array is returned instead). Examples -------- >>> a = np.array([[10, 7, 4], [3, 2, 1]]) >>> a array([[10, 7, 4], [ 3, 2, 1]]) >>> median(a) 3.0 >>> median(a, axis=1) array([[ 7.], [ 2.]]) """ x = np.array(x, copy=False, order='c', subok=True) shape = x.shape dtype = np.find_common_type([np.float64, x.dtype], []) if dtype != np.float64: raise TypeError("Invalid input type '{0}'.".format(dtype)) x = np.asanyarray(x, dtype) if mask is None and hasattr(x, 'mask'): mask = x.mask if mask is not None and mask.shape != x.shape: raise ValueError('Incompatible mask shape.') if mask is not None: mask = np.array(mask, dtype=bool, order='c', copy=False).view(np.int8) if axis is not None: slow = product(shape[:axis]) fast = product(shape[axis+1:]) x = x.reshape((slow,-1,fast)) if mask is not None: mask = mask.reshape((slow,-1,fast)).view(np.int8) if out is not None: if out.nbytes != slow * fast * dtype.itemsize: raise ValueError('Incompatible output buffer length.') if out.shape != shape[:axis] + shape[axis+1:]: raise ValueError('Incompatible output shape.') out.dtype = dtype else: out = np.empty(shape[:axis] + shape[axis+1:], dtype) out_ = out.reshape((slow,fast)) else: out = np.empty((), dtype) out_ = out if mask is axis is None: tmf.math.median(x.ravel(), out) elif axis is None: tmf.math.median_mask(x.ravel(), mask.ravel(), out) elif mask is None: tmf.math.median_axis(x.T, out_.T) else: tmf.math.median_mask_axis(x.T, mask.T, out_.T) if out.ndim == 0: out = out.flat[0] return out
def qr(a, mode='reduced'): """QR decomposition. Decompose a given two-dimensional matrix into ``Q * R``, where ``Q`` is an orthonormal and ``R`` is an upper-triangular matrix. Args: a (cupy.ndarray): The input matrix. mode (str): The mode of decomposition. Currently 'reduced', 'complete', 'r', and 'raw' modes are supported. The default mode is 'reduced', in which matrix ``A = (M, N)`` is decomposed into ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where ``K = min(M, N)``. Returns: cupy.ndarray, or tuple of ndarray: Although the type of returned object depends on the mode, it returns a tuple of ``(Q, R)`` by default. For details, please see the document of :func:`numpy.linalg.qr`. .. seealso:: :func:`numpy.linalg.qr` """ if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) if mode not in ('reduced', 'complete', 'r', 'raw'): if mode in ('f', 'full', 'e', 'economic'): msg = 'The deprecated mode \'{}\' is not supported'.format(mode) raise ValueError(msg) else: raise ValueError('Unrecognized mode \'{}\''.format(mode)) # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char m, n = a.shape x = a.transpose().astype(dtype, order='C', copy=True) mn = min(m, n) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) # compute working space of geqrf and ormqr, and solve R if dtype == 'f': buffersize = cusolver.sgeqrf_bufferSize(handle, m, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float32) tau = cupy.empty(mn, dtype=numpy.float32) cusolver.sgeqrf( handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) else: # dtype == 'd' buffersize = cusolver.dgeqrf_bufferSize(handle, n, m, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float64) tau = cupy.empty(mn, dtype=numpy.float64) cusolver.dgeqrf( handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) status = int(dev_info[0]) if status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') if mode == 'r': r = x[:, :mn].transpose() return util._triu(r) if mode == 'raw': if a.dtype.char == 'f': # The original numpy.linalg.qr returns float64 in raw mode, # whereas the cusolver returns float32. We agree that the # following code would be inappropriate, however, in this time # we explicitly convert them to float64 for compatibility. return x.astype(numpy.float64), tau.astype(numpy.float64) return x, tau if mode == 'complete' and m > n: mc = m q = cupy.empty((m, m), dtype) else: mc = mn q = cupy.empty((n, m), dtype) q[:n] = x # solve Q if dtype == 'f': buffersize = cusolver.sorgqr_bufferSize( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=numpy.float32) cusolver.sorgqr( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) else: buffersize = cusolver.dorgqr_bufferSize( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=numpy.float64) cusolver.dorgqr( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) q = q[:mc].transpose() r = x[:, :mc].transpose() return q, util._triu(r)
def __init__(self, expressions, dtype=None, shape=None, fill_value=None): self.is_masked = any([e.is_masked for e in expressions]) self.fill_value = fill_value if self.is_masked and fill_value is None: for expression in expressions: if expression.is_masked: try: # fast path self.fill_value = expression[0:1].fill_value break except: # noqa # slower path (we have to evaluate everything) self.fill_value = expression.values.fill_value break else: raise ValueError( 'Concatenating expressions with masked values, but no fill value is found' ) if dtype is None: dtypes = [e.dtype for e in expressions] any_strings = any([is_string_type(dtype) for dtype in dtypes]) if any_strings: self.dtype = pa.string( ) # TODO: how do we know it should not be large_string? else: # np.datetime64/timedelta64 and find_common_type don't mix very well if all([dtype == 'datetime64' for dtype in dtypes]): self.dtype = dtypes[0] elif all([dtype == 'timedelta64' for dtype in dtypes]): self.dtype = dtypes[0] else: if all([dtype == dtypes[0] for dtype in dtypes ]): # find common types doesn't always behave well self.dtype = dtypes[0] if any([dtype.kind in 'SU' for dtype in dtypes ]): # strings are also done manually if all([dtype.kind in 'SU' for dtype in dtypes]): index = np.argmax( [dtype.itemsize for dtype in dtypes]) self.dtype = dtypes[index] else: index = np.argmax([ df.columns[self.column_name].astype( 'O').astype('U').dtype.itemsize for df in dfs ]) self.dtype = dfs[index].columns[ self.column_name].astype('O').astype('U').dtype else: self.dtype = np.find_common_type( [k.numpy for k in dtypes], []) logger.debug("common type for %r is %r", dtypes, self.dtype) # make sure all expression are the same type self.expressions = [ e if vaex.array_types.same_type(e.dtype, self.dtype) else e.astype(self.dtype) for e in expressions ] else: # if dtype is given, we assume every expression/column is the same dtype self.dtype = dtype self.expressions = expressions[:] if shape is not None: self.shape = (len(self), ) + shape else: self.shape = (len(self), ) + self.expressions[0].evaluate( 0, 1, array_type='numpy', parallel=False).shape[1:] for i in range(1, len(self.expressions)): expression = self.expressions[i] shape_i = (len(self), ) + expressions[i].evaluate( 0, 1, array_type='numpy', parallel=False).shape[1:] if self.shape != shape_i: raise ValueError( "shape of of expression %s, array index 0, is %r and is incompatible with the shape of the same column of array index %d, %r" % (self.expressions[0], self.shape, i, shape_i))
def merge_disjoint_meshes(meshes, skip_tests=False, single_group=False): if not meshes: raise ValueError("must pass at least one mesh") from pytools import is_single_valued if not is_single_valued(mesh.ambient_dim for mesh in meshes): raise ValueError("all meshes must share the same ambient dimension") # {{{ assemble combined vertex array ambient_dim = meshes[0].ambient_dim nvertices = sum(mesh.vertices.shape[-1] for mesh in meshes) vert_dtype = np.find_common_type([mesh.vertices.dtype for mesh in meshes], []) vertices = np.empty((ambient_dim, nvertices), vert_dtype) current_vert_base = 0 vert_bases = [] for mesh in meshes: mesh_nvert = mesh.vertices.shape[-1] vertices[:, current_vert_base:current_vert_base+mesh_nvert] = \ mesh.vertices vert_bases.append(current_vert_base) current_vert_base += mesh_nvert # }}} # {{{ assemble new groups list nodal_adjacency = None facial_adjacency_groups = None if single_group: grp_cls = None order = None unit_nodes = None nodal_adjacency = None facial_adjacency_groups = None for mesh in meshes: if mesh._nodal_adjacency is not None: nodal_adjacency = False if mesh._facial_adjacency_groups is not None: facial_adjacency_groups = False for group in mesh.groups: if grp_cls is None: grp_cls = type(group) order = group.order unit_nodes = group.unit_nodes else: assert type(group) == grp_cls assert group.order == order assert np.array_equal(unit_nodes, group.unit_nodes) vertex_indices = np.vstack([ group.vertex_indices + vert_base for mesh, vert_base in zip(meshes, vert_bases) for group in mesh.groups ]) nodes = np.hstack( [group.nodes for mesh in meshes for group in mesh.groups]) if not nodes.flags.c_contiguous: # hstack stopped producing C-contiguous arrays in numpy 1.14 nodes = nodes.copy(order="C") new_groups = [ grp_cls(order, vertex_indices, nodes, unit_nodes=unit_nodes) ] else: new_groups = [] nodal_adjacency = None facial_adjacency_groups = None for mesh, vert_base in zip(meshes, vert_bases): if mesh._nodal_adjacency is not None: nodal_adjacency = False if mesh._facial_adjacency_groups is not None: facial_adjacency_groups = False for group in mesh.groups: new_vertex_indices = group.vertex_indices + vert_base new_group = group.copy(vertex_indices=new_vertex_indices) new_groups.append(new_group) # }}} from meshmode.mesh import Mesh return Mesh(vertices, new_groups, skip_tests=skip_tests, nodal_adjacency=nodal_adjacency, facial_adjacency_groups=facial_adjacency_groups, is_conforming=all(mesh.is_conforming for mesh in meshes))
def _cast_common_type(*xs): dtypes = [x.dtype for x in xs if x is not None] dtype = numpy.find_common_type(dtypes, []) return [x.astype(dtype) if x is not None and x.dtype != dtype else x for x in xs]
def test_scalar_loses2(self): res = np.find_common_type(['f4', 'f4'], ['i8']) assert_(res == 'f4')
def _get_empty_dtype_and_na( join_units: Sequence[JoinUnit]) -> Tuple[DtypeObj, Any]: """ Return dtype and N/A values to use when concatenating specified units. Returned N/A value may be None which means there was no casting involved. Returns ------- dtype na """ if len(join_units) == 1: blk = join_units[0].block if blk is None: return np.dtype(np.float64), np.nan if _is_uniform_reindex(join_units): # FIXME: integrate property empty_dtype = join_units[0].block.dtype upcasted_na = join_units[0].block.fill_value return empty_dtype, upcasted_na has_none_blocks = False dtypes = [None] * len(join_units) for i, unit in enumerate(join_units): if unit.block is None: has_none_blocks = True else: dtypes[i] = unit.dtype upcast_classes = _get_upcast_classes(join_units, dtypes) # TODO: de-duplicate with maybe_promote? # create the result if "extension" in upcast_classes: if len(upcast_classes) == 1: cls = upcast_classes["extension"][0] return cls, cls.na_value else: return np.dtype("object"), np.nan elif "object" in upcast_classes: return np.dtype(np.object_), np.nan elif "bool" in upcast_classes: if has_none_blocks: return np.dtype(np.object_), np.nan else: return np.dtype(np.bool_), None elif "category" in upcast_classes: return np.dtype(np.object_), np.nan elif "datetimetz" in upcast_classes: # GH-25014. We use NaT instead of iNaT, since this eventually # ends up in DatetimeArray.take, which does not allow iNaT. dtype = upcast_classes["datetimetz"] return dtype[0], NaT elif "datetime" in upcast_classes: return np.dtype("M8[ns]"), np.datetime64("NaT", "ns") elif "timedelta" in upcast_classes: return np.dtype("m8[ns]"), np.timedelta64("NaT", "ns") else: # pragma try: common_dtype = np.find_common_type(upcast_classes, []) except TypeError: # At least one is an ExtensionArray return np.dtype(np.object_), np.nan else: if is_float_dtype(common_dtype): return common_dtype, common_dtype.type(np.nan) elif is_numeric_dtype(common_dtype): if has_none_blocks: return np.dtype(np.float64), np.nan else: return common_dtype, None msg = "invalid dtype determination in get_concat_dtype" raise AssertionError(msg)
def test_scalar_wins2(self): res = np.find_common_type(['u4', 'i4', 'i4'], ['f4']) assert_(res == 'f8')
def c_math_mangler(target, name, arg_dtypes, modify_name=True): # Function mangler for math functions defined in C standard # Convert abs, min, max to fabs, fmin, fmax. # If modify_name is set to True, function names are modified according to # floating point types of the arguments (e.g. cos(double), cosf(float)) # This should be set to True for C and Cuda, False for OpenCL if not isinstance(name, str): return None if name in ["abs", "min", "max"]: name = "f" + name # unitary functions if (name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] and len(arg_dtypes) == 1 and arg_dtypes[0].numpy_dtype.kind == "f"): dtype = arg_dtypes[0].numpy_dtype if modify_name: if dtype == np.float64: pass # fabs elif dtype == np.float32: name = name + "f" # fabsf elif dtype == np.float128: # pylint:disable=no-member name = name + "l" # fabsl else: raise LoopyTypeError("%s does not support type %s" % (name, dtype)) return CallMangleInfo( target_name=name, result_dtypes=arg_dtypes, arg_dtypes=arg_dtypes) # binary functions if (name in ["fmax", "fmin", "copysign"] and len(arg_dtypes) == 2): dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "c": raise LoopyTypeError("%s does not support complex numbers") elif dtype.kind == "f": if modify_name: if dtype == np.float64: pass # fmin elif dtype == np.float32: name = name + "f" # fminf elif dtype == np.float128: # pylint:disable=no-member name = name + "l" # fminl else: raise LoopyTypeError("%s does not support type %s" % (name, dtype)) result_dtype = NumpyType(dtype) return CallMangleInfo( target_name=name, result_dtypes=(result_dtype,), arg_dtypes=2*(result_dtype,)) return None
def _get_empty_dtype_and_na(join_units): """ Return dtype and N/A values to use when concatenating specified units. Returned N/A value may be None which means there was no casting involved. Returns ------- dtype na """ if len(join_units) == 1: blk = join_units[0].block if blk is None: return np.float64, np.nan if _is_uniform_reindex(join_units): # FIXME: integrate property empty_dtype = join_units[0].block.dtype upcasted_na = join_units[0].block.fill_value return empty_dtype, upcasted_na has_none_blocks = False dtypes = [None] * len(join_units) for i, unit in enumerate(join_units): if unit.block is None: has_none_blocks = True else: dtypes[i] = unit.dtype upcast_classes = defaultdict(list) null_upcast_classes = defaultdict(list) for dtype, unit in zip(dtypes, join_units): if dtype is None: continue if is_categorical_dtype(dtype): upcast_cls = "category" elif is_datetime64tz_dtype(dtype): upcast_cls = "datetimetz" elif issubclass(dtype.type, np.bool_): upcast_cls = "bool" elif issubclass(dtype.type, np.object_): upcast_cls = "object" elif is_datetime64_dtype(dtype): upcast_cls = "datetime" elif is_timedelta64_dtype(dtype): upcast_cls = "timedelta" elif is_sparse(dtype): upcast_cls = dtype.subtype.name elif is_extension_array_dtype(dtype): upcast_cls = "object" elif is_float_dtype(dtype) or is_numeric_dtype(dtype): upcast_cls = dtype.name else: upcast_cls = "float" # Null blocks should not influence upcast class selection, unless there # are only null blocks, when same upcasting rules must be applied to # null upcast classes. if unit.is_na: null_upcast_classes[upcast_cls].append(dtype) else: upcast_classes[upcast_cls].append(dtype) if not upcast_classes: upcast_classes = null_upcast_classes # TODO: de-duplicate with maybe_promote? # create the result if "object" in upcast_classes: return np.dtype(np.object_), np.nan elif "bool" in upcast_classes: if has_none_blocks: return np.dtype(np.object_), np.nan else: return np.dtype(np.bool_), None elif "category" in upcast_classes: return np.dtype(np.object_), np.nan elif "datetimetz" in upcast_classes: # GH-25014. We use NaT instead of iNaT, since this eventually # ends up in DatetimeArray.take, which does not allow iNaT. dtype = upcast_classes["datetimetz"] return dtype[0], NaT elif "datetime" in upcast_classes: return np.dtype("M8[ns]"), np.datetime64("NaT", "ns") elif "timedelta" in upcast_classes: return np.dtype("m8[ns]"), np.timedelta64("NaT", "ns") else: # pragma try: g = np.find_common_type(upcast_classes, []) except TypeError: # At least one is an ExtensionArray return np.dtype(np.object_), np.nan else: if is_float_dtype(g): return g, g.type(np.nan) elif is_numeric_dtype(g): if has_none_blocks: return np.float64, np.nan else: return g, None msg = "invalid dtype determination in get_concat_dtype" raise AssertionError(msg)
def _get_dtype(operators, dtypes=[]): for obj in operators: if obj is not None and hasattr(obj, 'dtype'): dtypes.append(obj.dtype) return np.find_common_type(dtypes, [])
def block_diag(*arrs): """ [ReWrite of scipy.linalg.block_diag] Create a block diagonal matrix from provided arrays. Given the inputs `A`, `B` and `C`, the output will have these arrays arranged on the diagonal:: [[A, 0, 0], [0, B, 0], [0, 0, C]] Args ---------- A, B, C, ... : array_like, up to 2-D Input arrays. A 1-D array or array_like sequence of length `n` is treated as a 2-D array with shape ``(1,n)``. Returns ------- D : ndarray Array with `A`, `B`, `C`, ... on the diagonal. `D` has the same dtype as `A`. Notes ----- If all the input arrays are square, the output is known as a block diagonal matrix. Empty sequences (i.e., array-likes of zero size) are ignored. Examples -------- >>> from scipy.linalg import block_diag >>> A = [[1, 0], ... [0, 1]] >>> B = [[3, 4, 5], ... [6, 7, 8]] >>> C = [[7]] >>> block_diag(A, B, C) array([[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 3, 4, 5, 0], [0, 0, 6, 7, 8, 0], [0, 0, 0, 0, 0, 7]]) >>> block_diag(1.0, [2, 3], [[4, 5], [6, 7]]) array([[ 1., 0., 0., 0., 0.], [ 0., 2., 3., 0., 0.], [ 0., 0., 0., 4., 5.], [ 0., 0., 0., 6., 7.]]) """ if arrs == (): arrs = ([], ) arrs = [np.atleast_2d(a) for a in arrs] bad_args = [k for k in range(len(arrs)) if arrs[k].ndim > 2] if bad_args: raise ValueError("arguments in the following positions have dimension " "greater than 2: %s" % bad_args) shapes = np.array([a.shape for a in arrs]) out_dtype = np.find_common_type([arr.dtype for arr in arrs], []) out = np.zeros(np.sum(shapes, axis=0), dtype=out_dtype) r, c = 0, 0 for i, (rr, cc) in enumerate(shapes): out[r:r + rr, c:c + cc] = arrs[i] r += rr c += cc return out
def merge_coolers( output_uri, input_uris, mergebuf, columns=None, dtypes=None, agg=None, **kwargs ): """ Merge multiple coolers with identical axes. The merged cooler is stored at ``output_uri``. .. versionadded:: 0.8.0 Parameters ---------- output_uri : str Output cooler file path or URI. input_uris : list of str List of input file path or URIs of coolers to combine. mergebuf : int Maximum number of pixels processed at a time. columns : list of str, optional Specify which pixel value columns to include in the aggregation. Default is to use all available value columns. dtypes : dict, optional Specific dtypes to use for value columns. Default is to propagate the current dtypes of the value columns. agg : dict, optional Functions to use for aggregating each value column. Pass the same kind of dict accepted by ``pandas.DataFrame.groupby.agg``. Default is to apply 'sum' to every value column. kwargs Passed to ``cooler.create``. Notes ----- The default output file mode is 'w'. If appending output to an existing file, pass `mode='a'`. See also -------- cooler.coarsen_cooler cooler.zoomify_cooler """ # TODO: combine metadata from inputs from .api import Cooler logger.info("Merging:\n{}".format("\n".join(input_uris))) clrs = [Cooler(path) for path in input_uris] is_symm = [clr.storage_mode == u"symmetric-upper" for clr in clrs] if all(is_symm): symmetric_upper = True elif not any(is_symm): symmetric_upper = False else: ValueError("Cannot merge symmetric and non-symmetric coolers.") if columns is None: columns = ["count"] dtype_map = defaultdict(list) for clr in clrs: pixel_dtypes = clr.pixels().dtypes for col in columns: if col not in pixel_dtypes: raise ValueError( "Pixel value column '{}' not found in " "input '{}'.".format(col, clr.filename) ) else: dtype_map[col].append(pixel_dtypes[col]) if dtypes is None: dtypes = {} for col in columns: if col not in dtypes: dtypes[col] = np.find_common_type(dtype_map[col], []) bins = clrs[0].bins()[["chrom", "start", "end"]][:] assembly = clrs[0].info.get("genome-assembly", None) iterator = CoolerMerger(clrs, maxbuf=mergebuf, columns=columns, agg=agg) create( output_uri, bins, iterator, columns=columns, dtypes=dtypes, assembly=assembly, symmetric_upper=symmetric_upper, **kwargs )
def inverse_transform(self, X): """Convert the back data to the original representation. In case unknown categories are encountered (all zeros in the one-hot encoding), ``None`` is used to represent this category. Parameters ---------- X : array-like or sparse matrix, shape [n_samples, n_encoded_features] The transformed data. Returns ------- X_tr : array-like, shape [n_samples, n_features] Inverse transformed array. """ # if self._legacy_mode: # raise ValueError("only supported for categorical features") check_is_fitted(self, 'categories_') X = check_array(X, accept_sparse='csr') n_samples, _ = X.shape n_features = len(self.categories_) if self.drop is None: n_transformed_features = sum( len(cats) for cats in self.categories_) else: n_transformed_features = sum( len(cats) - 1 for cats in self.categories_) # validate shape of passed X msg = ("Shape of the passed X data is not correct. Expected {0} " "columns, got {1}.") if X.shape[1] != n_transformed_features: raise ValueError(msg.format(n_transformed_features, X.shape[1])) # create resulting array of appropriate dtype dt = np.find_common_type([cat.dtype for cat in self.categories_], []) X_tr = np.empty((n_samples, n_features), dtype=dt) j = 0 found_unknown = {} for i in range(n_features): if self.drop is None: cats = self.categories_[i] else: cats = np.delete(self.categories_[i], self.drop_idx_[i]) n_categories = len(cats) # Only happens if there was a column with a unique # category. In this case we just fill the column with this # unique category value. if n_categories == 0: X_tr[:, i] = self.categories_[i][self.drop_idx_[i]] j += n_categories continue sub = X[:, j:j + n_categories] # for sparse X argmax returns 2D matrix, ensure 1D array labels = np.asarray(_argmax(sub, axis=1)).flatten() X_tr[:, i] = cats[labels] if self.handle_unknown == 'ignore': unknown = np.asarray(sub.sum(axis=1) == 0).flatten() # ignored unknown categories: we have a row of all zero if unknown.any(): found_unknown[i] = unknown # drop will either be None or handle_unknown will be error. If # self.drop is not None, then we can safely assume that all of # the nulls in each column are the dropped value elif self.drop is not None: dropped = np.asarray(sub.sum(axis=1) == 0).flatten() if dropped.any(): X_tr[dropped, i] = self.categories_[i][self.drop_idx_[i]] j += n_categories # if ignored are found: potentially need to upcast result to # insert None values if found_unknown: if X_tr.dtype != object: X_tr = X_tr.astype(object) for idx, mask in found_unknown.items(): X_tr[mask, idx] = None return X_tr
def svd(a, full_matrices=True, compute_uv=True): """Singular Value Decomposition. Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s singular values. Args: a (cupy.ndarray): The input matrix with dimension ``(M, N)``. full_matrices (bool): If True, it returns u and v with dimensions ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of u and v are respectively ``(M, K)`` and ``(K, N)``, where ``K = min(M, N)``. compute_uv (bool): If True, it only returns singular values. Returns: tuple of :class:`cupy.ndarray`: A tuple of ``(u, s, v)`` such that ``a = u * np.diag(s) * v``. .. seealso:: :func:`numpy.linalg.svd` """ if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) # Cast to float32 or float64 a_dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char if a_dtype == 'f': s_dtype = 'f' elif a_dtype == 'd': s_dtype = 'd' elif a_dtype == 'F': s_dtype = 'f' else: # a_dtype == 'D': a_dtype = 'D' s_dtype = 'd' # Remark 1: gesvd only supports m >= n (WHAT?) # Remark 2: gesvd only supports jobu = 'A' and jobvt = 'A' # Remark 3: gesvd returns matrix U and V^H # Remark 4: Remark 2 is removed since cuda 8.0 (new!) n, m = a.shape # `a` must be copied because xgesvd destroys the matrix if m >= n: x = a.astype(a_dtype, order='C', copy=True) trans_flag = False else: m, n = a.shape x = a.transpose().astype(a_dtype, order='C', copy=True) trans_flag = True mn = min(m, n) if compute_uv: if full_matrices: u = cupy.empty((m, m), dtype=a_dtype) vt = cupy.empty((n, n), dtype=a_dtype) else: u = cupy.empty((mn, m), dtype=a_dtype) vt = cupy.empty((mn, n), dtype=a_dtype) u_ptr, vt_ptr = u.data.ptr, vt.data.ptr else: u_ptr, vt_ptr = 0, 0 # Use nullptr s = cupy.empty(mn, dtype=s_dtype) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if compute_uv: job = ord('A') if full_matrices else ord('S') else: job = ord('N') if a_dtype == 'f': buffersize = cusolver.sgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) cusolver.sgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) elif a_dtype == 'd': buffersize = cusolver.dgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) cusolver.dgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) elif a_dtype == 'F': buffersize = cusolver.cgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) cusolver.cgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) else: # a_dtype == 'D': buffersize = cusolver.zgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) cusolver.zgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) status = int(dev_info[0]) if status > 0: raise linalg.LinAlgError( 'SVD computation does not converge') elif status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') # Note that the returned array may need to be transporsed # depending on the structure of an input if compute_uv: if trans_flag: return u.transpose(), s, vt.transpose() else: return vt, s, u else: return s
def cuda_with_types(self, arg_id_to_dtype, callables_table): name = self.name if name in _CUDA_SPECIFIC_FUNCTIONS: num_args = _CUDA_SPECIFIC_FUNCTIONS[name] # {{{ sanity checks for id, dtype in arg_id_to_dtype.items(): if not -1 <= id < num_args: raise LoopyError("%s can take only %d arguments." % (name, num_args)) if dtype is not None and dtype.kind == "c": raise LoopyTypeError( f"'{name}' does not support complex arguments.") # }}} for i in range(num_args): if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = np.find_common_type([], [ dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if id >= 0 ]) updated_arg_id_to_dtype = { id: NumpyType(dtype) for id in range(-1, num_args) } return (self.copy(name_in_target=name, arg_id_to_dtype=updated_arg_id_to_dtype), callables_table) if name == "dot": # CUDA dot function: # Performs dot product. Input types: vector and return type: scalar. for i in range(2): if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) input_dtype = arg_id_to_dtype[0] scalar_dtype, offset, field_name = input_dtype.fields["x"] return_dtype = scalar_dtype return self.copy(arg_id_to_dtype={ 0: input_dtype, 1: input_dtype, -1: return_dtype }) return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table)
def inv(a): """Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix Returns: cupy.ndarray: The inverse of a matrix. .. seealso:: :func:`numpy.linalg.inv` """ if a.ndim >= 3: return _batched_inv(a) if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # to prevent `a` to be overwritten a = a.copy() util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) ipiv = cupy.empty((a.shape[0], 1), dtype=numpy.intc) if dtype == 'f': getrf = cusolver.sgetrf getrf_bufferSize = cusolver.sgetrf_bufferSize getrs = cusolver.sgetrs elif dtype == 'd': getrf = cusolver.dgetrf getrf_bufferSize = cusolver.dgetrf_bufferSize getrs = cusolver.dgetrs elif dtype == 'F': getrf = cusolver.cgetrf getrf_bufferSize = cusolver.cgetrf_bufferSize getrs = cusolver.cgetrs elif dtype == 'D': getrf = cusolver.zgetrf getrf_bufferSize = cusolver.zgetrf_bufferSize getrs = cusolver.zgetrs else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) m = a.shape[0] buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) # LU factorization getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) b = cupy.eye(m, dtype=dtype) # solve for the inverse getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr, m, dev_info.data.ptr) return b