def test_pandas_dtypes(self): # TODO: not implemented yet with self.assertRaises(TypeError): self.assertEqual(_find_common_type([CategoricalDtype()]), CategoricalDtype) with self.assertRaises(TypeError): self.assertEqual(_find_common_type([DatetimeTZDtype()]), DatetimeTZDtype)
def test_period_dtype(self): dtype = PeriodDtype(freq='D') self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), PeriodDtype(freq='2D'), PeriodDtype(freq='H'), np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(_find_common_type([dtype, dtype2]), np.object) self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
def test_datetimetz_dtype(self): dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') self.assertEqual(_find_common_type([dtype, dtype]), 'datetime64[ns, US/Eastern]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(_find_common_type([dtype, dtype2]), np.object) self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
def test_period_dtype(self): dtype = PeriodDtype(freq='D') self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), PeriodDtype(freq='2D'), PeriodDtype(freq='H'), np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(_find_common_type([dtype, dtype2]), np.object) self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
def test_datetimetz_dtype(self): dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') self.assertEqual(_find_common_type([dtype, dtype]), 'datetime64[ns, US/Eastern]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(_find_common_type([dtype, dtype2]), np.object) self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
def test_numpy_dtypes(self): # (source_types, destination_type) testcases = ( # identity ((np.int64,), np.int64), ((np.uint64,), np.uint64), ((np.float32,), np.float32), ((np.object,), np.object), # into ints ((np.int16, np.int64), np.int64), ((np.int32, np.uint32), np.int64), ((np.uint16, np.uint64), np.uint64), # into floats ((np.float16, np.float32), np.float32), ((np.float16, np.int16), np.float32), ((np.float32, np.int16), np.float32), ((np.uint64, np.int64), np.float64), ((np.int16, np.float64), np.float64), ((np.float16, np.int64), np.float64), # into others ((np.complex128, np.int32), np.complex128), ((np.object, np.float32), np.object), ((np.object, np.int16), np.object), ) for src, common in testcases: self.assertEqual(_find_common_type(src), common)
def test_numpy_dtypes(self): # (source_types, destination_type) testcases = ( # identity ((np.int64,), np.int64), ((np.uint64,), np.uint64), ((np.float32,), np.float32), ((np.object,), np.object), # into ints ((np.int16, np.int64), np.int64), ((np.int32, np.uint32), np.int64), ((np.uint16, np.uint64), np.uint64), # into floats ((np.float16, np.float32), np.float32), ((np.float16, np.int16), np.float32), ((np.float32, np.int16), np.float32), ((np.uint64, np.int64), np.float64), ((np.int16, np.float64), np.float64), ((np.float16, np.int64), np.float64), # into others ((np.complex128, np.int32), np.complex128), ((np.object, np.float32), np.object), ((np.object, np.int16), np.object), ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')), np.dtype('datetime64[ns]')), ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')), np.dtype('timedelta64[ns]')), ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')), np.dtype('datetime64[ns]')), ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')), np.dtype('timedelta64[ns]')), ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')), np.object), ((np.dtype('datetime64[ns]'), np.int64), np.object) ) for src, common in testcases: self.assertEqual(_find_common_type(src), common) with tm.assertRaises(ValueError): # empty _find_common_type([])
def test_numpy_dtypes(self): # (source_types, destination_type) testcases = ( # identity ((np.int64, ), np.int64), ((np.uint64, ), np.uint64), ((np.float32, ), np.float32), ((np.object, ), np.object), # into ints ((np.int16, np.int64), np.int64), ((np.int32, np.uint32), np.int64), ((np.uint16, np.uint64), np.uint64), # into floats ((np.float16, np.float32), np.float32), ((np.float16, np.int16), np.float32), ((np.float32, np.int16), np.float32), ((np.uint64, np.int64), np.float64), ((np.int16, np.float64), np.float64), ((np.float16, np.int64), np.float64), # into others ((np.complex128, np.int32), np.complex128), ((np.object, np.float32), np.object), ((np.object, np.int16), np.object), ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')), np.dtype('datetime64[ns]')), ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')), np.dtype('timedelta64[ns]')), ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')), np.dtype('datetime64[ns]')), ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')), np.dtype('timedelta64[ns]')), ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')), np.object), ((np.dtype('datetime64[ns]'), np.int64), np.object)) for src, common in testcases: self.assertEqual(_find_common_type(src), common) with tm.assertRaises(ValueError): # empty _find_common_type([])
def test_pandas_dtypes(self): dtype = CategoricalDtype() self.assertEqual(_find_common_type([dtype]), 'category') self.assertEqual(_find_common_type([dtype, dtype]), 'category') self.assertEqual(_find_common_type([np.object, dtype]), np.object) dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') self.assertEqual(_find_common_type([dtype, dtype]), 'datetime64[ns, US/Eastern]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(_find_common_type([dtype, dtype2]), np.object) self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
def to_coo(self): """ Return the contents of the frame as a sparse SciPy COO matrix. .. versionadded:: 0.20.0 Returns ------- coo_matrix : scipy.sparse.spmatrix If the caller is heterogeneous and contains booleans or objects, the result will be of dtype=object. See Notes. Notes ----- The dtype will be the lowest-common-denominator type (implicit upcasting); that is to say if the dtypes (even of numeric types) are mixed, the one that accommodates all will be chosen. e.g. If the dtypes are float16 and float32, dtype will be upcast to float32. By numpy.find_common_type convention, mixing int64 and and uint64 will result in a float64 dtype. """ try: from scipy.sparse import coo_matrix except ImportError: raise ImportError('Scipy is not installed') dtype = _find_common_type(self.dtypes) cols, rows, datas = [], [], [] for col, name in enumerate(self): s = self[name] row = s.sp_index.to_int_index().indices cols.append(np.repeat(col, len(row))) rows.append(row) datas.append(s.sp_values.astype(dtype, copy=False)) cols = np.concatenate(cols) rows = np.concatenate(rows) datas = np.concatenate(datas) return coo_matrix((datas, (rows, cols)), shape=self.shape)
def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): dtype = _find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) mask = notnull(x) & notnull(y) result[mask] = op(x[mask], _values_from_object(y[mask])) elif isinstance(x, np.ndarray): result = np.empty(len(x), dtype=x.dtype) mask = notnull(x) result[mask] = op(x[mask], y) else: raise TypeError("{typ} cannot perform the operation " "{op}".format(typ=type(x).__name__, op=str_rep)) result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result
def _sparse_array_op(left, right, op, name, series=False): if series and is_integer_dtype(left) and is_integer_dtype(right): # series coerces to float64 if result should have NaN/inf if name in ('floordiv', 'mod') and (right.values == 0).any(): left = left.astype(np.float64) right = right.astype(np.float64) elif name in ('rfloordiv', 'rmod') and (left.values == 0).any(): left = left.astype(np.float64) right = right.astype(np.float64) # dtype used to find corresponding sparse method if not is_dtype_equal(left.dtype, right.dtype): dtype = _find_common_type([left.dtype, right.dtype]) left = left.astype(dtype) right = right.astype(dtype) else: dtype = left.dtype # dtype the result must have result_dtype = None if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: with np.errstate(all='ignore'): result = op(left.get_values(), right.get_values()) fill = op(_get_fill(left), _get_fill(right)) if left.sp_index.ngaps == 0: index = left.sp_index else: index = right.sp_index elif left.sp_index.equals(right.sp_index): with np.errstate(all='ignore'): result = op(left.sp_values, right.sp_values) fill = op(_get_fill(left), _get_fill(right)) index = left.sp_index else: if name[0] == 'r': left, right = right, left name = name[1:] if name in ('and', 'or') and dtype == 'bool': opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype) # to make template simple, cast here left_sp_values = left.sp_values.view(np.uint8) right_sp_values = right.sp_values.view(np.uint8) result_dtype = np.bool else: opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype) left_sp_values = left.sp_values right_sp_values = right.sp_values sparse_op = getattr(splib, opname) with np.errstate(all='ignore'): result, index, fill = sparse_op(left_sp_values, left.sp_index, left.fill_value, right_sp_values, right.sp_index, right.fill_value) if result_dtype is None: result_dtype = result.dtype return _wrap_result(name, result, index, fill, dtype=result_dtype)
def _sparse_array_op(left, right, op, name, series=False): if series and is_integer_dtype(left) and is_integer_dtype(right): # series coerces to float64 if result should have NaN/inf if name in ('floordiv', 'mod') and (right.values == 0).any(): left = left.astype(np.float64) right = right.astype(np.float64) elif name in ('rfloordiv', 'rmod') and (left.values == 0).any(): left = left.astype(np.float64) right = right.astype(np.float64) # dtype used to find corresponding sparse method if not is_dtype_equal(left.dtype, right.dtype): dtype = _find_common_type([left.dtype, right.dtype]) left = left.astype(dtype) right = right.astype(dtype) else: dtype = left.dtype # dtype the result must have result_dtype = None if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: with np.errstate(all='ignore'): result = op(left.get_values(), right.get_values()) fill = op(_get_fill(left), _get_fill(right)) if left.sp_index.ngaps == 0: index = left.sp_index else: index = right.sp_index elif left.sp_index.equals(right.sp_index): with np.errstate(all='ignore'): result = op(left.sp_values, right.sp_values) fill = op(_get_fill(left), _get_fill(right)) index = left.sp_index else: if name[0] == 'r': left, right = right, left name = name[1:] if name in ('and', 'or') and dtype == 'bool': opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype) # to make template simple, cast here left_sp_values = left.sp_values.view(np.uint8) right_sp_values = right.sp_values.view(np.uint8) result_dtype = np.bool else: opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype) left_sp_values = left.sp_values right_sp_values = right.sp_values sparse_op = getattr(splib, opname) with np.errstate(all='ignore'): result, index, fill = sparse_op(left_sp_values, left.sp_index, left.fill_value, right_sp_values, right.sp_index, right.fill_value) if result_dtype is None: result_dtype = result.dtype return _wrap_result(name, result, index, fill, dtype=result_dtype)
def test_categorical_dtype(self): dtype = CategoricalDtype() self.assertEqual(_find_common_type([dtype]), 'category') self.assertEqual(_find_common_type([dtype, dtype]), 'category') self.assertEqual(_find_common_type([np.object, dtype]), np.object)
def test_categorical_dtype(self): dtype = CategoricalDtype() self.assertEqual(_find_common_type([dtype]), 'category') self.assertEqual(_find_common_type([dtype, dtype]), 'category') self.assertEqual(_find_common_type([np.object, dtype]), np.object)