def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'): arg = np.array(list(arg), dtype='O') # these are shortcut-able if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype( 'timedelta64[ns]', copy=False) else: try: value = array_to_timedelta64(ensure_object(arg), unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) except ValueError: if errors == 'ignore': return arg else: # This else-block accounts for the cases when errors='raise' # and errors='coerce'. If errors == 'raise', these errors # should be raised. If errors == 'coerce', we shouldn't # expect any errors to be raised, since all parsing errors # cause coercion to pd.NaT. However, if an error / bug is # introduced that causes an Exception to be raised, we would # like to surface it. raise if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value, unit='ns', name=name) return value
def _convert_listlike(arg, format): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = ensure_object(arg) if infer_time_format and format is None: format = _guess_time_format_for_array(arg) times = [] if format is not None: for element in arg: try: times.append(datetime.strptime(element, format).time()) except (ValueError, TypeError): if errors == 'raise': msg = ("Cannot convert {element} to a time with given " "format {format}").format(element=element, format=format) raise ValueError(msg) elif errors == 'ignore': return arg else: times.append(None) else: formats = _time_formats[:] format_found = False for element in arg: time_object = None for time_format in formats: try: time_object = datetime.strptime(element, time_format).time() if not format_found: # Put the found format in front fmt = formats.pop(formats.index(time_format)) formats.insert(0, fmt) format_found = True break except (ValueError, TypeError): continue if time_object is not None: times.append(time_object) elif errors == 'raise': raise ValueError("Cannot convert arg {arg} to " "a time".format(arg=arg)) elif errors == 'ignore': return arg else: times.append(None) return times
def array_equivalent(left, right, strict_nan: bool = False) -> bool: """ True if two arrays, left and right, have equal non-NaN elements, and NaNs in corresponding locations. False otherwise. It is assumed that left and right are NumPy arrays of the same dtype. The behavior of this function (particularly with respect to NaNs) is not defined if the dtypes are different. Parameters ---------- left, right : ndarrays strict_nan : bool, default False If True, consider NaN and None to be different. Returns ------- b : bool Returns True if the arrays are equivalent. Examples -------- >>> array_equivalent( ... np.array([1, 2, np.nan]), ... np.array([1, 2, np.nan])) True >>> array_equivalent( ... np.array([1, np.nan, 2]), ... np.array([1, 2, np.nan])) False """ left, right = np.asarray(left), np.asarray(right) # shape compat if left.shape != right.shape: return False # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat if is_string_dtype(left) or is_string_dtype(right): if not strict_nan: # isna considers NaN and None to be equivalent. return lib.array_equivalent_object( ensure_object(left.ravel()), ensure_object(right.ravel()) ) for left_value, right_value in zip(left, right): if left_value is NaT and right_value is not NaT: return False elif left_value is libmissing.NA and right_value is not libmissing.NA: return False elif isinstance(left_value, float) and np.isnan(left_value): if not isinstance(right_value, float) or not np.isnan(right_value): return False else: try: if np.any(np.asarray(left_value != right_value)): return False except TypeError as err: if "Cannot compare tz-naive" in str(err): # tzawareness compat failure, see GH#28507 return False elif "boolean value of NA is ambiguous" in str(err): return False raise return True # NaNs can occur in float and complex arrays. if is_float_dtype(left) or is_complex_dtype(left): # empty if not (np.prod(left.shape) and np.prod(right.shape)): return True return ((left == right) | (isna(left) & isna(right))).all() elif is_datetimelike_v_numeric(left, right): # GH#29553 avoid numpy deprecation warning return False elif needs_i8_conversion(left) or needs_i8_conversion(right): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): return False left = left.view("i8") right = right.view("i8") # if we have structured dtypes, compare first if left.dtype.type is np.void or right.dtype.type is np.void: if left.dtype != right.dtype: return False return np.array_equal(left, right)
def _convert_listlike_datetimes( arg, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None, ): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parsed name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- Index-like of parsed dates """ from pandas import DatetimeIndex from pandas.core.arrays.datetimes import ( maybe_convert_dtype, objects_to_datetime64ns, ) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") arg_dtype = getattr(arg, "dtype", None) # these are shortcutable if is_datetime64tz_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == "utc": # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has # no attribute "tz_convert" arg = arg.tz_convert(None).tz_localize(tz) # type: ignore return arg elif is_datetime64_ns_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass elif tz: # DatetimeArray, DatetimeIndex # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has # no attribute "tz_localize" return arg.tz_localize(tz) # type: ignore return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, "_values", arg) # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): result = arg.astype(f"datetime64[{unit}]") tz_parsed = None else: result, tz_parsed = tslib.array_with_unit_to_datetime( arg, unit, errors=errors) if errors == "ignore": from pandas import Index result = Index(result, name=name) else: result = DatetimeIndex(result, name=name) # GH 23758: We may still need to localize the result with tz # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) # result will be naive but in UTC try: result = result.tz_localize("UTC").tz_convert(tz_parsed) except AttributeError: # Regular Index from 'ignore' path return result if tz is not None: if result.tz is None: result = result.tz_localize(tz) else: result = result.tz_convert(tz) return result elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" ) # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg try: arg, _ = maybe_convert_dtype(arg, copy=False) except TypeError: if errors == "coerce": result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(result, name=name) elif errors == "ignore": from pandas import Index result = Index(arg, name=name) return result raise arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None tz_parsed = None result = None if format is not None: try: # shortcut formatting here if format == "%Y%m%d": try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime) as err: raise ValueError( "cannot convert the input to '%Y%m%d' date format" ) from err # fallback if result is None: try: result, timezones = array_strptime(arg, format, exact=exact, errors=errors) if "%Z" in format or "%z" in format: return _return_parsed_timezone_results( result, timezones, tz, name) except tslibs.OutOfBoundsDatetime: if errors == "raise": raise elif errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(tslibs.iNaT) else: result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == "raise": raise elif errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(tslibs.iNaT) else: result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) dta = DatetimeArray(values, dtype=tz_to_dtype(tz)) return DatetimeIndex._simple_new(dta, name=name) except (ValueError, TypeError): raise e if result is None: assert format is None or infer_datetime_format utc = tz == "utc" result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True, ) if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array # is in UTC dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) return DatetimeIndex._simple_new(dta, name=name) utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name)
def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): assert kind in ['transform', 'aggregate'] # can we do this operation with our cython functions # if not raise NotImplementedError # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes # categoricals are only 1d, so we # are not setup for dim transforming if is_categorical_dtype(values): raise NotImplementedError( "categoricals are not support in cython ops ATM") elif is_datetime64_any_dtype(values): if how in ['add', 'prod', 'cumsum', 'cumprod']: raise NotImplementedError( "datetime64 type does not support {} " "operations".format(how)) elif is_timedelta64_dtype(values): if how in ['prod', 'cumprod']: raise NotImplementedError( "timedelta64 type does not support {} " "operations".format(how)) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True values = values.swapaxes(0, axis) if arity > 1: raise NotImplementedError("arity of more than 1 is not " "supported for the 'how' argument") out_shape = (self.ngroups, ) + values.shape[1:] is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view('int64') is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_float64(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int64_or_float64(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(values) else: values = values.astype(object) try: func = self._get_cython_function(kind, how, values, is_numeric) except NotImplementedError: if is_numeric: values = ensure_float64(values) func = self._get_cython_function(kind, how, values, is_numeric) else: raise if how == 'rank': out_dtype = 'float' else: if is_numeric: out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize) else: out_dtype = 'object' labels, _, _ = self.group_info if kind == 'aggregate': result = _maybe_fill(np.empty(out_shape, dtype=out_dtype), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate(result, counts, values, labels, func, is_numeric, is_datetimelike, min_count) elif kind == 'transform': result = _maybe_fill(np.empty_like(values, dtype=out_dtype), fill_value=np.nan) # TODO: min_count result = self._transform(result, values, labels, func, is_numeric, is_datetimelike, **kwargs) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype('float64') result[mask] = np.nan if (kind == 'aggregate' and self._filter_empty_groups and not counts.all()): if result.ndim == 2: try: result = lib.row_bool_subset(result, (counts > 0).view(np.uint8)) except ValueError: result = lib.row_bool_subset_object( ensure_object(result), (counts > 0).view(np.uint8)) else: result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] if how in self._name_functions: # TODO names = self._name_functions[how]() else: names = None if swapped: result = result.swapaxes(0, axis) return result, names
def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, tz=None, dtype=None, copy=False, name=None, **fields): valid_field_set = { 'year', 'month', 'day', 'quarter', 'hour', 'minute', 'second' } if not set(fields).issubset(valid_field_set): raise TypeError( '__new__() got an unexpected keyword argument {}'.format( list(set(fields) - valid_field_set)[0])) if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if name is None and hasattr(data, 'name'): name = data.name if dtype is not None: dtype = pandas_dtype(dtype) if not is_period_dtype(dtype): raise ValueError('dtype must be PeriodDtype') if freq is None: freq = dtype.freq elif freq != dtype.freq: msg = 'specified freq and dtype are different' raise IncompatibleFrequency(msg) # coerce freq to freq object, otherwise it can be coerced elementwise # which is slow if freq: freq = Period._maybe_convert_freq(freq) if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, freq, fields) return cls._from_ordinals(data, name=name, freq=freq) if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: # no freq change freq = data.freq data = data._ndarray_values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._ndarray_values, base1, base2, 1) return cls._simple_new(data, name=name, freq=freq) # not array / index if not isinstance( data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): cls._scalar_data_error(data) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data) # datetime other than period if is_datetime64_dtype(data.dtype): data = dt64arr_to_periodarr(data, freq, tz) return cls._from_ordinals(data, name=name, freq=freq) # check not floats if infer_dtype(data) == 'floating' and len(data) > 0: raise TypeError("PeriodIndex does not allow " "floating point in construction") # anything else, likely an array of strings or periods data = ensure_object(data) freq = freq or period.extract_freq(data) data = period.extract_ordinals(data, freq) return cls._from_ordinals(data, name=name, freq=freq)
def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, tz=None, dtype=None, copy=False, name=None, **fields): valid_field_set = {'year', 'month', 'day', 'quarter', 'hour', 'minute', 'second'} if not set(fields).issubset(valid_field_set): raise TypeError('__new__() got an unexpected keyword argument {}'. format(list(set(fields) - valid_field_set)[0])) periods = dtl.validate_periods(periods) if name is None and hasattr(data, 'name'): name = data.name freq = dtl.validate_dtype_freq(dtype, freq) # coerce freq to freq object, otherwise it can be coerced elementwise # which is slow if freq: freq = Period._maybe_convert_freq(freq) if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, freq, fields) return cls._simple_new(data, name=name, freq=freq) if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: # no freq change freq = data.freq data = data._ndarray_values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._ndarray_values, base1, base2, 1) return cls._simple_new(data, name=name, freq=freq) # not array / index if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data): cls._scalar_data_error(data) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data) # datetime other than period if is_datetime64_dtype(data.dtype): data = dt64arr_to_periodarr(data, freq, tz) return cls._simple_new(data, name=name, freq=freq) # check not floats if infer_dtype(data) == 'floating' and len(data) > 0: raise TypeError("PeriodIndex does not allow " "floating point in construction") # anything else, likely an array of strings or periods data = ensure_object(data) freq = freq or period.extract_freq(data) data = period.extract_ordinals(data, freq) return cls._simple_new(data, name=name, freq=freq)
def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs): assert kind in ['transform', 'aggregate'] # can we do this operation with our cython functions # if not raise NotImplementedError # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes # categoricals are only 1d, so we # are not setup for dim transforming if is_categorical_dtype(values): raise NotImplementedError( "categoricals are not support in cython ops ATM") elif is_datetime64_any_dtype(values): if how in ['add', 'prod', 'cumsum', 'cumprod']: raise NotImplementedError( "datetime64 type does not support {} " "operations".format(how)) elif is_timedelta64_dtype(values): if how in ['prod', 'cumprod']: raise NotImplementedError( "timedelta64 type does not support {} " "operations".format(how)) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True values = values.swapaxes(0, axis) if arity > 1: raise NotImplementedError("arity of more than 1 is not " "supported for the 'how' argument") out_shape = (self.ngroups,) + values.shape[1:] is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view('int64') is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_float64(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int64_or_float64(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(values) else: values = values.astype(object) try: func = self._get_cython_function( kind, how, values, is_numeric) except NotImplementedError: if is_numeric: values = ensure_float64(values) func = self._get_cython_function( kind, how, values, is_numeric) else: raise if how == 'rank': out_dtype = 'float' else: if is_numeric: out_dtype = '{kind}{itemsize}'.format( kind=values.dtype.kind, itemsize=values.dtype.itemsize) else: out_dtype = 'object' labels, _, _ = self.group_info if kind == 'aggregate': result = _maybe_fill(np.empty(out_shape, dtype=out_dtype), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate( result, counts, values, labels, func, is_numeric, is_datetimelike, min_count) elif kind == 'transform': result = _maybe_fill(np.empty_like(values, dtype=out_dtype), fill_value=np.nan) # TODO: min_count result = self._transform( result, values, labels, func, is_numeric, is_datetimelike, **kwargs) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype('float64') result[mask] = np.nan if (kind == 'aggregate' and self._filter_empty_groups and not counts.all()): if result.ndim == 2: try: result = lib.row_bool_subset( result, (counts > 0).view(np.uint8)) except ValueError: result = lib.row_bool_subset_object( ensure_object(result), (counts > 0).view(np.uint8)) else: result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] if how in self._name_functions: # TODO names = self._name_functions[how]() else: names = None if swapped: result = result.swapaxes(0, axis) return result, names
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex from pandas.core.arrays.datetimes import ( maybe_convert_dtype, objects_to_datetime64ns) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result, name=name) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg arg, _ = maybe_convert_dtype(arg, copy=False) arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None tz_parsed = None result = None if format is not None: try: # shortcut formatting here if format == '%Y%m%d': try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime( arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( result, timezones, box, tz, name) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e if result is None: assert format is None or infer_datetime_format utc = tz == 'utc' result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True) if tz_parsed is not None: if box: # We can take a shortcut since the datetime64 numpy array # is in UTC return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) else: # Convert the datetime64 numpy array to an numpy array # of datetime objects result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result] return np.array(result, dtype=object) if box: # Ensure we return an Index in all cases where box=True if is_datetime64_dtype(result): return DatetimeIndex(result, tz=tz, name=name) elif is_object_dtype(result): # e.g. an Index of datetime objects from pandas import Index return Index(result, name=name) return result
def to_numeric(arg, errors='raise', downcast=None): """ Convert argument to a numeric type. The default return dtype is `float64` or `int64` depending on the data supplied. Use the `downcast` parameter to obtain other dtypes. Parameters ---------- arg : list, tuple, 1-d array, or Series errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaN - If 'ignore', then invalid parsing will return the input downcast : {'integer', 'signed', 'unsigned', 'float'} , default None If not None, and if the data has been successfully cast to a numerical dtype (or if the data was numeric to begin with), downcast that resulting data to the smallest numerical dtype possible according to the following rules: - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - 'float': smallest float dtype (min.: np.float32) As this behaviour is separate from the core conversion to numeric values, any errors raised during the downcasting will be surfaced regardless of the value of the 'errors' input. In addition, downcasting will only occur if the size of the resulting data's dtype is strictly larger than the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. .. versionadded:: 0.19.0 Returns ------- ret : numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray Examples -------- Take separate series and convert to numeric, coercing when told to >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 >>> pd.to_numeric(s, downcast='float') 0 1.0 1 2.0 2 -3.0 dtype: float32 >>> pd.to_numeric(s, downcast='signed') 0 1 1 2 2 -3 dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') 0 apple 1 1.0 2 2 3 -3 dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 dtype: float64 See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. pandas.to_timedelta : Convert argument to timedelta. numpy.ndarray.astype : Cast a numpy array to a specified type. """ if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): raise ValueError('invalid downcasting method provided') is_series = False is_index = False is_scalars = False if isinstance(arg, ABCSeries): is_series = True values = arg.values elif isinstance(arg, ABCIndexClass): is_index = True values = arg.asi8 if values is None: values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') elif is_scalar(arg): if is_decimal(arg): return float(arg) if is_number(arg): return arg is_scalars = True values = np.array([arg], dtype='O') elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a list, tuple, 1-d array, or Series') else: values = arg try: if is_numeric_dtype(values): pass elif is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: values = ensure_object(values) coerce_numeric = False if errors in ('ignore', 'raise') else True values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) except Exception: if errors == 'raise': raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified if downcast is not None and is_numeric_dtype(values): typecodes = None if downcast in ('integer', 'signed'): typecodes = np.typecodes['Integer'] elif downcast == 'unsigned' and np.min(values) >= 0: typecodes = np.typecodes['UnsignedInteger'] elif downcast == 'float': typecodes = np.typecodes['Float'] # pandas support goes only to np.float32, # as float dtypes smaller than that are # extremely rare and not well supported float_32_char = np.dtype(np.float32).char float_32_ind = typecodes.index(float_32_char) typecodes = typecodes[float_32_ind:] if typecodes is not None: # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize <= values.dtype.itemsize: values = maybe_downcast_to_dtype(values, dtype) # successful conversion if values.dtype == dtype: break if is_series: return pd.Series(values, index=arg.index, name=arg.name) elif is_index: # because we want to coerce to numeric if possible, # do not use _shallow_copy_with_infer return pd.Index(values, name=arg.name) elif is_scalars: return values[0] else: return values
def maybe_downcast_to_dtype(result, dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ do_round = False if is_scalar(result): return result elif isinstance(result, ABCDataFrame): # occurs in pivot_table doctest return result if isinstance(dtype, str): if dtype == "infer": inferred_type = lib.infer_dtype(ensure_object(result.ravel()), skipna=False) if inferred_type == "boolean": dtype = "bool" elif inferred_type == "integer": dtype = "int64" elif inferred_type == "datetime64": dtype = "datetime64[ns]" elif inferred_type == "timedelta64": dtype = "timedelta64[ns]" # try to upcast here elif inferred_type == "floating": dtype = "int64" if issubclass(result.dtype.type, np.number): do_round = True else: dtype = "object" dtype = np.dtype(dtype) converted = maybe_downcast_numeric(result, dtype, do_round) if converted is not result: return converted # a datetimelike # GH12821, iNaT is casted to float if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: if hasattr(dtype, "tz"): # not a numpy dtype if dtype.tz: # convert to datetime and change timezone from pandas import to_datetime result = to_datetime(result).tz_localize("utc") result = result.tz_convert(dtype.tz) else: result = result.astype(dtype) elif dtype.type is Period: # TODO(DatetimeArray): merge with previous elif from pandas.core.arrays import PeriodArray try: return PeriodArray(result, freq=dtype.freq) except TypeError: # e.g. TypeError: int() argument must be a string, a # bytes-like object or a number, not 'Period pass return result
def maybe_infer_to_datetimelike(value, convert_dates: bool = False): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a datetime/timedelta set this is pretty strict in that a datetime/timedelta is REQUIRED in addition to possible nulls/string likes Parameters ---------- value : np.array / Series / Index / list-like convert_dates : bool, default False if True try really hard to convert dates (such as datetime.date), other leave inferred dtype 'date' alone """ # TODO: why not timedelta? if isinstance( value, (ABCDatetimeIndex, ABCPeriodIndex, ABCDatetimeArray, ABCPeriodArray)): return value elif isinstance(value, ABCSeries): if isinstance(value._values, ABCDatetimeIndex): return value._values v = value if not is_list_like(v): v = [v] v = np.array(v, copy=False) # we only care about object dtypes if not is_object_dtype(v): return value shape = v.shape if not v.ndim == 1: v = v.ravel() if not len(v): return value def try_datetime(v): # safe coerce to datetime64 try: # GH19671 v = tslib.array_to_datetime(v, require_iso8601=True, errors="raise")[0] except ValueError: # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype, xref GH19671 from pandas._libs.tslibs import conversion from pandas import DatetimeIndex try: values, tz = conversion.datetime_to_datetime64(v) return DatetimeIndex(values).tz_localize("UTC").tz_convert( tz=tz) except (ValueError, TypeError): pass except Exception: pass return v.reshape(shape) def try_timedelta(v): # safe coerce to timedelta64 # will try first with a string & object conversion from pandas import to_timedelta try: return to_timedelta(v)._ndarray_values.reshape(shape) except ValueError: return v.reshape(shape) inferred_type = lib.infer_datetimelike_array(ensure_object(v)) if inferred_type == "date" and convert_dates: value = try_datetime(v) elif inferred_type == "datetime": value = try_datetime(v) elif inferred_type == "timedelta": value = try_timedelta(v) elif inferred_type == "nat": # if all NaT, return as datetime if isna(v).all(): value = try_datetime(v) else: # We have at least a NaT and a string # try timedelta first to avoid spurious datetime conversions # e.g. '00:00:01' is a timedelta but technically is also a datetime value = try_timedelta(v) if lib.infer_dtype(value, skipna=False) in ["mixed"]: # cannot skip missing values, as NaT implies that the string # is actually a datetime value = try_datetime(v) return value
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime(arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( result, timezones, box, tz) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result, tz_parsed = tslib.array_to_datetime( arg, errors=errors, utc=tz == 'utc', dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601) if tz_parsed is not None and box: return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) if box: if is_datetime64_dtype(result): return DatetimeIndex(result, tz=tz, name=name) elif is_object_dtype(result): from pandas import Index return Index(result, name=name) return result except ValueError as e: try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def to_numeric(arg, errors='raise', downcast=None): """ Convert argument to a numeric type. The default return dtype is `float64` or `int64` depending on the data supplied. Use the `downcast` parameter to obtain other dtypes. Parameters ---------- arg : list, tuple, 1-d array, or Series errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaN - If 'ignore', then invalid parsing will return the input downcast : {'integer', 'signed', 'unsigned', 'float'} , default None If not None, and if the data has been successfully cast to a numerical dtype (or if the data was numeric to begin with), downcast that resulting data to the smallest numerical dtype possible according to the following rules: - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - 'float': smallest float dtype (min.: np.float32) As this behaviour is separate from the core conversion to numeric values, any errors raised during the downcasting will be surfaced regardless of the value of the 'errors' input. In addition, downcasting will only occur if the size of the resulting data's dtype is strictly larger than the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. .. versionadded:: 0.19.0 Returns ------- ret : numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray Examples -------- Take separate series and convert to numeric, coercing when told to >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 >>> pd.to_numeric(s, downcast='float') 0 1.0 1 2.0 2 -3.0 dtype: float32 >>> pd.to_numeric(s, downcast='signed') 0 1 1 2 2 -3 dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') 0 apple 1 1.0 2 2 3 -3 dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 dtype: float64 See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. pandas.to_timedelta : Convert argument to timedelta. numpy.ndarray.astype : Cast a numpy array to a specified type. """ if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): raise ValueError('invalid downcasting method provided') is_series = False is_index = False is_scalars = False if isinstance(arg, ABCSeries): is_series = True values = arg.values elif isinstance(arg, ABCIndexClass): is_index = True values = arg.asi8 if values is None: values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') elif is_scalar(arg): if is_decimal(arg): return float(arg) if is_number(arg): return arg is_scalars = True values = np.array([arg], dtype='O') elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a list, tuple, 1-d array, or Series') else: values = arg try: if is_numeric_dtype(values): pass elif is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: values = ensure_object(values) coerce_numeric = False if errors in ('ignore', 'raise') else True values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) except Exception: if errors == 'raise': raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified if downcast is not None and is_numeric_dtype(values): typecodes = None if downcast in ('integer', 'signed'): typecodes = np.typecodes['Integer'] elif downcast == 'unsigned' and np.min(values) >= 0: typecodes = np.typecodes['UnsignedInteger'] elif downcast == 'float': typecodes = np.typecodes['Float'] # pandas support goes only to np.float32, # as float dtypes smaller than that are # extremely rare and not well supported float_32_char = np.dtype(np.float32).char float_32_ind = typecodes.index(float_32_char) typecodes = typecodes[float_32_ind:] if typecodes is not None: # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize <= values.dtype.itemsize: values = maybe_downcast_to_dtype(values, dtype) # successful conversion if values.dtype == dtype: break if is_series: return pd.Series(values, index=arg.index, name=arg.name) elif is_index: # because we want to coerce to numeric if possible, # do not use _shallow_copy_with_infer return pd.Index(values, name=arg.name) elif is_scalars: return values[0] else: return values
def to_numeric(arg, errors="raise", downcast=None): """ Convert argument to a numeric type. The default return dtype is `float64` or `int64` depending on the data supplied. Use the `downcast` parameter to obtain other dtypes. Please note that precision loss may occur if really large numbers are passed in. Due to the internal limitations of `ndarray`, if numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are passed in, it is very likely they will be converted to float so that they can stored in an `ndarray`. These warnings apply similarly to `Series` since it internally leverages `ndarray`. Parameters ---------- arg : scalar, list, tuple, 1-d array, or Series Argument to be converted. errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaN. - If 'ignore', then invalid parsing will return the input. downcast : {'int', 'signed', 'unsigned', 'float'}, default None If not None, and if the data has been successfully cast to a numerical dtype (or if the data was numeric to begin with), downcast that resulting data to the smallest numerical dtype possible according to the following rules: - 'int' or 'signed': smallest signed int dtype (min.: np.int8) - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - 'float': smallest float dtype (min.: np.float32) As this behaviour is separate from the core conversion to numeric values, any errors raised during the downcasting will be surfaced regardless of the value of the 'errors' input. In addition, downcasting will only occur if the size of the resulting data's dtype is strictly larger than the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. Returns ------- ret Numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray. See Also -------- DataFrame.astype : Cast argument to a specified dtype. to_datetime : Convert argument to datetime. to_timedelta : Convert argument to timedelta. numpy.ndarray.astype : Cast a numpy array to a specified type. DataFrame.convert_dtypes : Convert dtypes. Examples -------- Take separate series and convert to numeric, coercing when told to >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 >>> pd.to_numeric(s, downcast='float') 0 1.0 1 2.0 2 -3.0 dtype: float32 >>> pd.to_numeric(s, downcast='signed') 0 1 1 2 2 -3 dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') 0 apple 1 1.0 2 2 3 -3 dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 dtype: float64 """ if downcast not in (None, "integer", "signed", "unsigned", "float"): raise ValueError("invalid downcasting method provided") if errors not in ("ignore", "raise", "coerce"): raise ValueError("invalid error value specified") is_series = False is_index = False is_scalars = False if isinstance(arg, ABCSeries): is_series = True values = arg.values elif isinstance(arg, ABCIndexClass): is_index = True values = arg.asi8 if values is None: values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype="O") elif is_scalar(arg): if is_decimal(arg): return float(arg) if is_number(arg): return arg is_scalars = True values = np.array([arg], dtype="O") elif getattr(arg, "ndim", 1) > 1: raise TypeError("arg must be a list, tuple, 1-d array, or Series") else: values = arg if is_numeric_dtype(values): pass elif is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: values = ensure_object(values) coerce_numeric = errors not in ("ignore", "raise") try: values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) except (ValueError, TypeError): if errors == "raise": raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified if downcast is not None and is_numeric_dtype(values): typecodes = None if downcast in ("integer", "signed"): typecodes = np.typecodes["Integer"] elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0): typecodes = np.typecodes["UnsignedInteger"] elif downcast == "float": typecodes = np.typecodes["Float"] # pandas support goes only to np.float32, # as float dtypes smaller than that are # extremely rare and not well supported float_32_char = np.dtype(np.float32).char float_32_ind = typecodes.index(float_32_char) typecodes = typecodes[float_32_ind:] if typecodes is not None: # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize <= values.dtype.itemsize: values = maybe_downcast_to_dtype(values, dtype) # successful conversion if values.dtype == dtype: break if is_series: return pd.Series(values, index=arg.index, name=arg.name) elif is_index: # because we want to coerce to numeric if possible, # do not use _shallow_copy_with_infer return pd.Index(values, name=arg.name) elif is_scalars: return values[0] else: return values
def period_array( data: Sequence[Optional[Period]], freq: Optional[Union[str, Tick]] = None, copy: bool = False, ) -> PeriodArray: """ Construct a new PeriodArray from a sequence of Period scalars. Parameters ---------- data : Sequence of Period objects A sequence of Period objects. These are required to all have the same ``freq.`` Missing values can be indicated by ``None`` or ``pandas.NaT``. freq : str, Tick, or Offset The frequency of every element of the array. This can be specified to avoid inferring the `freq` from `data`. copy : bool, default False Whether to ensure a copy of the data is made. Returns ------- PeriodArray See Also -------- PeriodArray pandas.PeriodIndex Examples -------- >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A')]) <PeriodArray> ['2017', '2018'] Length: 2, dtype: period[A-DEC] >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A'), ... pd.NaT]) <PeriodArray> ['2017', '2018', 'NaT'] Length: 3, dtype: period[A-DEC] Integers that look like years are handled >>> period_array([2000, 2001, 2002], freq='D') ['2000-01-01', '2001-01-01', '2002-01-01'] Length: 3, dtype: period[D] Datetime-like strings may also be passed >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') <PeriodArray> ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] Length: 4, dtype: period[Q-DEC] """ if is_datetime64_dtype(data): return PeriodArray._from_datetime64(data, freq) if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)): return PeriodArray(data, freq) # other iterable of some kind if not isinstance(data, (np.ndarray, list, tuple)): data = list(data) data = np.asarray(data) dtype: Optional[PeriodDtype] if freq: dtype = PeriodDtype(freq) else: dtype = None if is_float_dtype(data) and len(data) > 0: raise TypeError( "PeriodIndex does not allow floating point in construction") data = ensure_object(data) return PeriodArray._from_sequence(data, dtype=dtype)
def period_array( data: Sequence[Period | str | None] | AnyArrayLike, freq: str | Tick | None = None, copy: bool = False, ) -> PeriodArray: """ Construct a new PeriodArray from a sequence of Period scalars. Parameters ---------- data : Sequence of Period objects A sequence of Period objects. These are required to all have the same ``freq.`` Missing values can be indicated by ``None`` or ``pandas.NaT``. freq : str, Tick, or Offset The frequency of every element of the array. This can be specified to avoid inferring the `freq` from `data`. copy : bool, default False Whether to ensure a copy of the data is made. Returns ------- PeriodArray See Also -------- PeriodArray pandas.PeriodIndex Examples -------- >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A')]) <PeriodArray> ['2017', '2018'] Length: 2, dtype: period[A-DEC] >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A'), ... pd.NaT]) <PeriodArray> ['2017', '2018', 'NaT'] Length: 3, dtype: period[A-DEC] Integers that look like years are handled >>> period_array([2000, 2001, 2002], freq='D') <PeriodArray> ['2000-01-01', '2001-01-01', '2002-01-01'] Length: 3, dtype: period[D] Datetime-like strings may also be passed >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') <PeriodArray> ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] Length: 4, dtype: period[Q-DEC] """ data_dtype = getattr(data, "dtype", None) if is_datetime64_dtype(data_dtype): return PeriodArray._from_datetime64(data, freq) if is_period_dtype(data_dtype): return PeriodArray(data, freq=freq) # other iterable of some kind if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)): data = list(data) arrdata = np.asarray(data) dtype: PeriodDtype | None if freq: dtype = PeriodDtype(freq) else: dtype = None if is_float_dtype(arrdata) and len(arrdata) > 0: raise TypeError( "PeriodIndex does not allow floating point in construction") if is_integer_dtype(arrdata.dtype): arr = arrdata.astype(np.int64, copy=False) # error: Argument 2 to "from_ordinals" has incompatible type "Union[str, # Tick, None]"; expected "Union[timedelta, BaseOffset, str]" ordinals = libperiod.from_ordinals(arr, freq) # type: ignore[arg-type] return PeriodArray(ordinals, dtype=dtype) data = ensure_object(arrdata) return PeriodArray._from_sequence(data, dtype=dtype)
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray from pandas.core.arrays.datetimes import ( maybe_convert_dtype, objects_to_datetime64ns) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index result = Index(result, name=name) # GH 23758: We may still need to localize the result with tz try: return result.tz_localize(tz) except AttributeError: return result return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg arg, _ = maybe_convert_dtype(arg, copy=False) arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None tz_parsed = None result = None if format is not None: try: # shortcut formatting here if format == '%Y%m%d': try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime( arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( result, timezones, box, tz, name) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e if result is None: assert format is None or infer_datetime_format utc = tz == 'utc' result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True) if tz_parsed is not None: if box: # We can take a shortcut since the datetime64 numpy array # is in UTC return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) else: # Convert the datetime64 numpy array to an numpy array # of datetime objects result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result] return np.array(result, dtype=object) if box: # Ensure we return an Index in all cases where box=True if is_datetime64_dtype(result): return DatetimeIndex(result, tz=tz, name=name) elif is_object_dtype(result): # e.g. an Index of datetime objects from pandas import Index return Index(result, name=name) return result
def _convert_listlike_datetimes( arg, format: str | None, name: Hashable = None, tz: Timezone | None = None, unit: str | None = None, errors: str = "raise", infer_datetime_format: bool = False, dayfirst: bool | None = None, yearfirst: bool | None = None, exact: bool = True, ): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parsed name : object None or string for the Index name tz : object None or 'utc' unit : str None or string of the frequency of the passed data errors : str error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : bool, default False inferring format behavior from to_datetime dayfirst : bool dayfirst parsing behavior from to_datetime yearfirst : bool yearfirst parsing behavior from to_datetime exact : bool, default True exact format matching behavior from to_datetime Returns ------- Index-like of parsed dates """ if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") arg_dtype = getattr(arg, "dtype", None) # these are shortcutable if is_datetime64tz_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == "utc": arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass elif tz: # DatetimeArray, DatetimeIndex return arg.tz_localize(tz) return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") return _to_datetime_with_unit(arg, unit, name, tz, errors) elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" ) # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg try: arg, _ = maybe_convert_dtype(arg, copy=False) except TypeError: if errors == "coerce": npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(npvalues, name=name) elif errors == "ignore": idx = Index(arg, name=name) return idx raise arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None if format is not None: res = _to_datetime_with_format(arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format) if res is not None: return res assert format is None or infer_datetime_format utc = tz == "utc" result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True, ) if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array # is in UTC dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) return DatetimeIndex._simple_new(dta, name=name) utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name)
def period_array(data, freq=None, copy=False): # type: (Sequence[Optional[Period]], Optional[Tick]) -> PeriodArray """ Construct a new PeriodArray from a sequence of Period scalars. Parameters ---------- data : Sequence of Period objects A sequence of Period objects. These are required to all have the same ``freq.`` Missing values can be indicated by ``None`` or ``pandas.NaT``. freq : str, Tick, or Offset The frequency of every element of the array. This can be specified to avoid inferring the `freq` from `data`. copy : bool, default False Whether to ensure a copy of the data is made. Returns ------- PeriodArray See Also -------- PeriodArray pandas.PeriodIndex Examples -------- >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A')]) <PeriodArray> ['2017', '2018'] Length: 2, dtype: period[A-DEC] >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A'), ... pd.NaT]) <PeriodArray> ['2017', '2018', 'NaT'] Length: 3, dtype: period[A-DEC] Integers that look like years are handled >>> period_array([2000, 2001, 2002], freq='D') ['2000-01-01', '2001-01-01', '2002-01-01'] Length: 3, dtype: period[D] Datetime-like strings may also be passed >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') <PeriodArray> ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] Length: 4, dtype: period[Q-DEC] """ if is_datetime64_dtype(data): return PeriodArray._from_datetime64(data, freq) if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)): return PeriodArray(data, freq) # other iterable of some kind if not isinstance(data, (np.ndarray, list, tuple)): data = list(data) data = np.asarray(data) if freq: dtype = PeriodDtype(freq) else: dtype = None if is_float_dtype(data) and len(data) > 0: raise TypeError("PeriodIndex does not allow " "floating point in construction") data = ensure_object(data) return PeriodArray._from_sequence(data, dtype=dtype)
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime( arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( result, timezones, box, tz) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=tz == 'utc', dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601 ) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) return result except ValueError as e: try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e