def test_combine_first_dt64(self): from pandas.core.tools.datetimes import to_datetime s0 = to_datetime(Series(["2010", np.NaN])) s1 = to_datetime(Series([np.NaN, "2011"])) rs = s0.combine_first(s1) xp = to_datetime(Series(['2010', '2011'])) assert_series_equal(rs, xp) s0 = to_datetime(Series(["2010", np.NaN])) s1 = Series([np.NaN, "2011"]) rs = s0.combine_first(s1) xp = Series([datetime(2010, 1, 1), '2011']) assert_series_equal(rs, xp)
def _convert_1d(values, unit, axis): def try_parse(values): try: return dates.date2num(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date, np.datetime64, pydt.time)): return dates.date2num(values) elif is_integer(values) or is_float(values): return values elif isinstance(values, str): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray, Index, Series)): if isinstance(values, Series): # https://github.com/matplotlib/matplotlib/issues/11391 # Series was skipped. Convert to DatetimeIndex to get asi8 values = Index(values) if isinstance(values, Index): values = values.values if not isinstance(values, np.ndarray): values = com.asarray_tuplesafe(values) if is_integer_dtype(values) or is_float_dtype(values): return values try: values = tools.to_datetime(values) except Exception: pass values = dates.date2num(values) return values
def time2num(d): if isinstance(d, str): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time {d}'.format(d=d)) return _to_ordinalf(parsed.time()) if isinstance(d, pydt.time): return _to_ordinalf(d) return d
def time2num(d): if isinstance(d, compat.string_types): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time {d}'.format(d=d)) return _to_ordinalf(parsed.time()) if isinstance(d, pydt.time): return _to_ordinalf(d) return d
def time2num(d): if isinstance(d, compat.string_types): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time %s' % d) return _to_ordinalf(parsed.time()) if isinstance(d, pydt.time): return _to_ordinalf(d) return d
def time2num(d): if isinstance(d, str): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError(f"Could not parse time {d}") return _to_ordinalf(parsed.time()) if isinstance(d, pydt.time): return _to_ordinalf(d) return d
def _from_sequence_of_strings(cls, strings, *, dtype: Dtype | None = None, copy=False): """ Construct a new ExtensionArray from a sequence of strings. """ pa_type = to_pyarrow_type(dtype) if pa.types.is_timestamp(pa_type): from pandas.core.tools.datetimes import to_datetime scalars = to_datetime(strings, errors="raise") elif pa.types.is_date(pa_type): from pandas.core.tools.datetimes import to_datetime scalars = to_datetime(strings, errors="raise").date elif pa.types.is_duration(pa_type): from pandas.core.tools.timedeltas import to_timedelta scalars = to_timedelta(strings, errors="raise") elif pa.types.is_time(pa_type): from pandas.core.tools.times import to_time # "coerce" to allow "null times" (None) to not raise scalars = to_time(strings, errors="coerce") elif pa.types.is_boolean(pa_type): from pandas.core.arrays import BooleanArray scalars = BooleanArray._from_sequence_of_strings( strings).to_numpy() elif (pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type) or pa.types.is_decimal(pa_type)): from pandas.core.tools.numeric import to_numeric scalars = to_numeric(strings, errors="raise") else: # Let pyarrow try to infer or raise scalars = strings return cls._from_sequence(scalars, dtype=pa_type, copy=copy)
def converter(*date_cols): if date_parser is None: strs = parsing.concat_date_cols(date_cols) try: return tools.to_datetime( ensure_object(strs), utc=None, dayfirst=dayfirst, errors="ignore", infer_datetime_format=infer_datetime_format, cache=cache_dates, ).to_numpy() except ValueError: return tools.to_datetime(parsing.try_parse_dates( strs, dayfirst=dayfirst), cache=cache_dates) else: try: result = tools.to_datetime(date_parser(*date_cols), errors="ignore", cache=cache_dates) if isinstance(result, datetime.datetime): raise Exception("scalar parser") return result except Exception: try: return tools.to_datetime( parsing.try_parse_dates( parsing.concat_date_cols(date_cols), parser=date_parser, dayfirst=dayfirst, ), errors="ignore", ) except Exception: return generic_parser(date_parser, *date_cols)
def clean_xml(filename, save=False): ''' load_and_clean_xml() Function that creates an xml tree from an xml file. Then parses that file to find only elements that are step counts. Ouputs cleaned data to file. ''' tree = ET.parse(filename) root = tree.getroot() values = [] credate = [] startDates = [] endDates = [] units = [] recordTypes = [] # traverse xml fro data for node in root.findall('.//Record[@type="HKQuantityTypeIdentifierStepCount"]'): # only store nodes past a certain date if (node.get('creationDate') >= "2020-07-01 00:00:00 -0700"): values.append(int(node.get('value'))) credate.append(dt.datetime.strptime(node.get('creationDate'), '%Y-%m-%d %H:%M:%S %z').date()) startDates.append(dt.datetime.strptime(node.get('startDate'), '%Y-%m-%d %H:%M:%S %z')) endDates.append(dt.datetime.strptime(node.get('endDate'), '%Y-%m-%d %H:%M:%S %z')) units.append(node.get('unit')) recordTypes.append(node.get('type')) cleaned_data_df = pd.DataFrame({"recordType" : recordTypes, "unit" : units, "creationDate" : credate, "startDate" : startDates, "endDate" : endDates, "value" : values}, columns=["recordType","unit","creationDate","startDate","endDate","value"]) cleaned_data_df.creationDate = to_datetime(cleaned_data_df.creationDate) # format timestamps as UNIX timestamps for easier classification later cleaned_data_df['startDate'] = cleaned_data_df['startDate'].values.astype(np.int64) // 10 ** 9 cleaned_data_df['endDate'] = cleaned_data_df['endDate'].values.astype(np.int64) // 10 ** 9 # add columns to distinguish datetimes by week day and month cleaned_data_df['day of week (numeric)'] = pd.DatetimeIndex(cleaned_data_df['creationDate']).weekday cleaned_data_df['day of week (string)'] = pd.DatetimeIndex(cleaned_data_df['creationDate']).strftime('%A') cleaned_data_df['month'] = pd.DatetimeIndex(cleaned_data_df['creationDate']).month # store data if necessary if save: cleaned_data_df.to_csv('cleaned_apple_steps.csv', index=False) return cleaned_data_df pass
def test_parse_tz_aware(self): # See gh-1693 import pytz data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5") # it works result = self.read_csv(data, index_col=0, parse_dates=True) stamp = result.index[0] self.assertEqual(stamp.minute, 39) try: self.assertIs(result.index.tz, pytz.utc) except AssertionError: # hello Yaroslav arr = result.index.to_pydatetime() result = tools.to_datetime(arr, utc=True)[0] self.assertEqual(stamp.minute, result.minute) self.assertEqual(stamp.hour, result.hour) self.assertEqual(stamp.day, result.day)
def test_parse_tz_aware(self): # See gh-1693 import pytz data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5") # it works result = self.read_csv(data, index_col=0, parse_dates=True) stamp = result.index[0] assert stamp.minute == 39 try: assert result.index.tz is pytz.utc except AssertionError: # hello Yaroslav arr = result.index.to_pydatetime() result = tools.to_datetime(arr, utc=True)[0] assert stamp.minute == result.minute assert stamp.hour == result.hour assert stamp.day == result.day
def test_parse_tz_aware(self): # See gh-1693 import pytz data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5") # it works result = self.read_csv(data, index_col=0, parse_dates=True) stamp = result.index[0] self.assertEqual(stamp.minute, 39) try: self.assertIs(result.index.tz, pytz.utc) except AssertionError: # hello Yaroslav arr = result.index.to_pydatetime() result = tools.to_datetime(arr, utc=True)[0] self.assertEqual(stamp.minute, result.minute) self.assertEqual(stamp.hour, result.hour) self.assertEqual(stamp.day, result.day)
def test_parse_tz_aware(self): # See gh-1693 import pytz data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5") # it works result = self.read_csv(data, index_col=0, parse_dates=True) stamp = result.index[0] assert stamp.minute == 39 try: assert result.index.tz is pytz.utc except AssertionError: # hello Yaroslav arr = result.index.to_pydatetime() result = tools.to_datetime(arr, utc=True)[0] assert stamp.minute == result.minute assert stamp.hour == result.hour assert stamp.day == result.day
def _convert_1d(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, np.datetime64): return _dt_to_float_ordinal(tslibs.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) elif (is_integer(values) or is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray, Index, ABCSeries)): if isinstance(values, ABCSeries): # https://github.com/matplotlib/matplotlib/issues/11391 # Series was skipped. Convert to DatetimeIndex to get asi8 values = Index(values) if isinstance(values, Index): values = values.values if not isinstance(values, np.ndarray): values = com.asarray_tuplesafe(values) if is_integer_dtype(values) or is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = _dt_to_float_ordinal(values) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: values = _dt_to_float_ordinal(values) return values
def _convert_1d(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, np.datetime64): return _dt_to_float_ordinal(tslibs.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) elif (is_integer(values) or is_float(values)): return values elif isinstance(values, str): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray, Index, ABCSeries)): if isinstance(values, ABCSeries): # https://github.com/matplotlib/matplotlib/issues/11391 # Series was skipped. Convert to DatetimeIndex to get asi8 values = Index(values) if isinstance(values, Index): values = values.values if not isinstance(values, np.ndarray): values = com.asarray_tuplesafe(values) if is_integer_dtype(values) or is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = _dt_to_float_ordinal(values) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: values = _dt_to_float_ordinal(values) return values
def _convert_1d(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, np.datetime64): return _dt_to_float_ordinal(lib.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) elif (is_integer(values) or is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray, Index)): if isinstance(values, Index): values = values.values if not isinstance(values, np.ndarray): values = com._asarray_tuplesafe(values) if is_integer_dtype(values) or is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = _dt_to_float_ordinal(values) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: values = _dt_to_float_ordinal(values) return values
def truncate( self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True ) -> FrameOrSeries: if axis is None: axis = self._stat_axis_number axis = self._get_axis_number(axis) ax = self._get_axis(axis) # GH 17935 # Check that index is sorted if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing: raise ValueError("truncate requires a sorted index") # if we have a date index, convert to dates, otherwise # treat like a slice if ax.is_all_dates: from pandas.core.tools.datetimes import to_datetime before = to_datetime(before) after = to_datetime(after) if before is not None and after is not None: if before > after: raise ValueError(f"Truncate: {after} must be after {before}") if ax.is_monotonic_decreasing: before, after = after, before slicer = [slice(None, None)] * self._AXIS_LEN slicer[axis] = slice(before, after) result = self.loc[tuple(slicer)] if isinstance(ax, MultiIndex): setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) if copy: result = result.copy() return result """ Truncate a Series or DataFrame before and after some index value. This is a useful shorthand for boolean indexing based on index values above or below certain thresholds. Parameters ---------- before : date, str, int Truncate all rows before this index value. after : date, str, int Truncate all rows after this index value. axis : {0 or 'index', 1 or 'columns'}, optional Axis to truncate. Truncates the index (rows) by default. copy : bool, default is True, Return a copy of the truncated section. Returns ------- type of caller The truncated Series or DataFrame. See Also -------- DataFrame.loc : Select a subset of a DataFrame by label. DataFrame.iloc : Select a subset of a DataFrame by position. Notes ----- If the index being truncated contains only datetime values, `before` and `after` may be specified as strings instead of Timestamps. Examples -------- >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], ... 'B': ['f', 'g', 'h', 'i', 'j'], ... 'C': ['k', 'l', 'm', 'n', 'o']}, ... index=[1, 2, 3, 4, 5]) >>> df A B C 1 a f k 2 b g l 3 c h m 4 d i n 5 e j o >>> df.truncate(before=2, after=4) A B C 2 b g l 3 c h m 4 d i n The columns of a DataFrame can be truncated. """
def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values
def to_date(*args, **kwargs): return to_datetime(*args, **kwargs).date()
def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) self.assertEqual(frequencies.infer_freq(dates), 'D')
def test_non_datetime_index(): dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"]) assert frequencies.infer_freq(dates) == "D"
def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) self.assertEqual(frequencies.infer_freq(dates), 'D')
def try_parse(values): try: return dates.date2num(tools.to_datetime(values)) except Exception: return values
def test_non_datetime_index(): dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"]) assert frequencies.infer_freq(dates) == "D"
def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) assert frequencies.infer_freq(dates) == 'D'
def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) assert frequencies.infer_freq(dates) == 'D'
def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values