def test_downcast_conv(): # test downcasting arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) result = com._possibly_downcast_to_dtype(arr, 'infer') assert (np.array_equal(result, arr)) arr = np.array([8., 8., 8., 8., 8.9999999999995]) result = com._possibly_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) arr = np.array([8., 8., 8., 8., 9.0000000000005]) result = com._possibly_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) # conversions expected = np.array([1,2]) for dtype in [np.float64,object,np.int64]: arr = np.array([1.0,2.0],dtype=dtype) result = com._possibly_downcast_to_dtype(arr,'infer') tm.assert_almost_equal(result, expected) expected = np.array([1.0,2.0,np.nan]) for dtype in [np.float64,object]: arr = np.array([1.0,2.0,np.nan],dtype=dtype) result = com._possibly_downcast_to_dtype(arr,'infer') tm.assert_almost_equal(result, expected)
def test_downcast_conv(): # test downcasting arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) result = com._possibly_downcast_to_dtype(arr, 'infer') assert (np.array_equal(result, arr)) arr = np.array([8., 8., 8., 8., 8.9999999999995]) result = com._possibly_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) arr = np.array([8., 8., 8., 8., 9.0000000000005]) result = com._possibly_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) # conversions expected = np.array([1, 2]) for dtype in [np.float64, object, np.int64]: arr = np.array([1.0, 2.0], dtype=dtype) result = com._possibly_downcast_to_dtype(arr, 'infer') tm.assert_almost_equal(result, expected) expected = np.array([1.0, 2.0, np.nan]) for dtype in [np.float64, object]: arr = np.array([1.0, 2.0, np.nan], dtype=dtype) result = com._possibly_downcast_to_dtype(arr, 'infer') tm.assert_almost_equal(result, expected)
def downcast(self, dtypes=None): """ try to downcast each item to the dict of dtypes if present """ # turn it off completely if dtypes is False: return [self] values = self.values # single block handling if self._is_single_block: # try to cast all non-floats here if dtypes is None: dtypes = 'infer' nv = _possibly_downcast_to_dtype(values, dtypes) return [make_block(nv, ndim=self.ndim, fastpath=True, placement=self.mgr_locs)] # ndim > 1 if dtypes is None: return [self] if not (dtypes == 'infer' or isinstance(dtypes, dict)): raise ValueError("downcast must have a dictionary or 'infer' as " "its argument") # item-by-item # this is expensive as it splits the blocks items-by-item blocks = [] for i, rl in enumerate(self.mgr_locs): if dtypes == 'infer': dtype = 'infer' else: raise AssertionError("dtypes as dict is not supported yet") dtype = dtypes.get(item, self._downcast_dtype) if dtype is None: nv = _block_shape(values[i], ndim=self.ndim) else: nv = _possibly_downcast_to_dtype(values[i], dtype) nv = _block_shape(nv, ndim=self.ndim) blocks.append(make_block(nv, ndim=self.ndim, fastpath=True, placement=[rl])) return blocks
def test_downcast_conv(): # test downcasting arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) result = com._possibly_downcast_to_dtype(arr, 'infer') assert (np.array_equal(result, arr)) arr = np.array([8., 8., 8., 8., 8.9999999999995]) result = com._possibly_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) arr = np.array([8., 8., 8., 8., 9.0000000000005]) result = com._possibly_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected))
def _try_cast_result(self, result, dtype=None): """ try to cast the result to our original type, we may have roundtripped thru object in the mean-time """ if dtype is None: dtype = self.dtype if self.is_integer or self.is_bool or self.is_datetime: pass elif self.is_float and result.dtype == self.dtype: # protect against a bool/object showing up here if isinstance(dtype, compat.string_types) and dtype == 'infer': return result if not isinstance(dtype, type): dtype = dtype.type if issubclass(dtype, (np.bool_, np.object_)): if issubclass(dtype, np.bool_): if isnull(result).all(): return result.astype(np.bool_) else: result = result.astype(np.object_) result[result == 1] = True result[result == 0] = False return result else: return result.astype(np.object_) return result # may need to change the dtype here return _possibly_downcast_to_dtype(result, dtype)
def test_downcast_conv(): # test downcasting arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) result = com._possibly_downcast_to_dtype(arr, 'infer') assert (np.array_equal(result, arr)) arr = np.array([8., 8., 8., 8., 8.9999999999995]) result = com._possibly_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) arr = np.array([8., 8., 8., 8., 9.0000000000005]) result = com._possibly_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected))
def to_numeric(arg, errors='raise', downcast=None): """ Convert argument to a numeric type. Parameters ---------- arg : list, tuple, 1-d array, or Series errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaN - If 'ignore', then invalid parsing will return the input downcast : {'integer', 'signed', 'unsigned', 'float'} , default None If not None, and if the data has been successfully cast to a numerical dtype (or if the data was numeric to begin with), downcast that resulting data to the smallest numerical dtype possible according to the following rules: - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - 'float': smallest float dtype (min.: np.float32) As this behaviour is separate from the core conversion to numeric values, any errors raised during the downcasting will be surfaced regardless of the value of the 'errors' input. In addition, downcasting will only occur if the size of the resulting data's dtype is strictly larger than the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. .. versionadded:: 0.19.0 Returns ------- ret : numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray Examples -------- Take separate series and convert to numeric, coercing when told to >>> import pandas as pd >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 >>> pd.to_numeric(s, downcast='float') 0 1.0 1 2.0 2 -3.0 dtype: float32 >>> pd.to_numeric(s, downcast='signed') 0 1 1 2 2 -3 dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') 0 apple 1 1.0 2 2 3 -3 dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 dtype: float64 """ if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): raise ValueError('invalid downcasting method provided') is_series = False is_index = False is_scalar = False if isinstance(arg, pd.Series): is_series = True values = arg.values elif isinstance(arg, pd.Index): is_index = True values = arg.asi8 if values is None: values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') elif np.isscalar(arg): if com.is_number(arg): return arg is_scalar = True values = np.array([arg], dtype='O') elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a list, tuple, 1-d array, or Series') else: values = arg try: if com.is_numeric_dtype(values): pass elif com.is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: values = com._ensure_object(values) coerce_numeric = False if errors in ('ignore', 'raise') else True values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) except Exception: if errors == 'raise': raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified if downcast is not None and com.is_numeric_dtype(values): typecodes = None if downcast in ('integer', 'signed'): typecodes = np.typecodes['Integer'] elif downcast == 'unsigned' and np.min(values) > 0: typecodes = np.typecodes['UnsignedInteger'] elif downcast == 'float': typecodes = np.typecodes['Float'] # pandas support goes only to np.float32, # as float dtypes smaller than that are # extremely rare and not well supported float_32_char = np.dtype(np.float32).char float_32_ind = typecodes.index(float_32_char) typecodes = typecodes[float_32_ind:] if typecodes is not None: # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize < values.dtype.itemsize: values = com._possibly_downcast_to_dtype(values, dtype) # successful conversion if values.dtype == dtype: break if is_series: return pd.Series(values, index=arg.index, name=arg.name) elif is_index: # because we want to coerce to numeric if possible, # do not use _shallow_copy_with_infer return Index(values, name=arg.name) elif is_scalar: return values[0] else: return values
def to_numeric(arg, errors='raise', downcast=None): """ Convert argument to a numeric type. Parameters ---------- arg : list, tuple, 1-d array, or Series errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaN - If 'ignore', then invalid parsing will return the input downcast : {'integer', 'signed', 'unsigned', 'float'} , default None If not None, and if the data has been successfully cast to a numerical dtype (or if the data was numeric to begin with), downcast that resulting data to the smallest numerical dtype possible according to the following rules: - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - 'float': smallest float dtype (min.: np.float32) As this behaviour is separate from the core conversion to numeric values, any errors raised during the downcasting will be surfaced regardless of the value of the 'errors' input. In addition, downcasting will only occur if the size of the resulting data's dtype is strictly larger than the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. .. versionadded:: 0.19.0 Returns ------- ret : numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray Examples -------- Take separate series and convert to numeric, coercing when told to >>> import pandas as pd >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 >>> pd.to_numeric(s, downcast='float') 0 1.0 1 2.0 2 -3.0 dtype: float32 >>> pd.to_numeric(s, downcast='signed') 0 1 1 2 2 -3 dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') 0 apple 1 1.0 2 2 3 -3 dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 dtype: float64 """ if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): raise ValueError('invalid downcasting method provided') is_series = False is_index = False is_scalar = False if isinstance(arg, pd.Series): is_series = True values = arg.values elif isinstance(arg, pd.Index): is_index = True values = arg.asi8 if values is None: values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') elif np.isscalar(arg): if com.is_number(arg): return arg is_scalar = True values = np.array([arg], dtype='O') elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a list, tuple, 1-d array, or Series') else: values = arg try: if com.is_numeric_dtype(values): pass elif com.is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: values = com._ensure_object(values) coerce_numeric = False if errors in ('ignore', 'raise') else True values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) except Exception: if errors == 'raise': raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified if downcast is not None and com.is_numeric_dtype(values): typecodes = None if downcast in ('integer', 'signed'): typecodes = np.typecodes['Integer'] elif downcast == 'unsigned' and np.min(values) > 0: typecodes = np.typecodes['UnsignedInteger'] elif downcast == 'float': typecodes = np.typecodes['Float'] # pandas support goes only to np.float32, # as float dtypes smaller than that are # extremely rare and not well supported float_32_char = np.dtype(np.float32).char float_32_ind = typecodes.index(float_32_char) typecodes = typecodes[float_32_ind:] if typecodes is not None: # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize < values.dtype.itemsize: values = com._possibly_downcast_to_dtype( values, dtype) # successful conversion if values.dtype == dtype: break if is_series: return pd.Series(values, index=arg.index, name=arg.name) elif is_index: # because we want to coerce to numeric if possible, # do not use _shallow_copy_with_infer return Index(values, name=arg.name) elif is_scalar: return values[0] else: return values