Ejemplo n.º 1
0
    def test_combine_first_dt64(self):
        from pandas.core.tools.datetimes import to_datetime
        s0 = to_datetime(Series(["2010", np.NaN]))
        s1 = to_datetime(Series([np.NaN, "2011"]))
        rs = s0.combine_first(s1)
        xp = to_datetime(Series(['2010', '2011']))
        assert_series_equal(rs, xp)

        s0 = to_datetime(Series(["2010", np.NaN]))
        s1 = Series([np.NaN, "2011"])
        rs = s0.combine_first(s1)
        xp = Series([datetime(2010, 1, 1), '2011'])
        assert_series_equal(rs, xp)
Ejemplo n.º 2
0
    def _convert_1d(values, unit, axis):
        def try_parse(values):
            try:
                return dates.date2num(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date, np.datetime64, pydt.time)):
            return dates.date2num(values)
        elif is_integer(values) or is_float(values):
            return values
        elif isinstance(values, str):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray, Index, Series)):
            if isinstance(values, Series):
                # https://github.com/matplotlib/matplotlib/issues/11391
                # Series was skipped. Convert to DatetimeIndex to get asi8
                values = Index(values)
            if isinstance(values, Index):
                values = values.values
            if not isinstance(values, np.ndarray):
                values = com.asarray_tuplesafe(values)

            if is_integer_dtype(values) or is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
            except Exception:
                pass

            values = dates.date2num(values)

        return values
Ejemplo n.º 3
0
def time2num(d):
    if isinstance(d, str):
        parsed = tools.to_datetime(d)
        if not isinstance(parsed, datetime):
            raise ValueError('Could not parse time {d}'.format(d=d))
        return _to_ordinalf(parsed.time())
    if isinstance(d, pydt.time):
        return _to_ordinalf(d)
    return d
Ejemplo n.º 4
0
def time2num(d):
    if isinstance(d, compat.string_types):
        parsed = tools.to_datetime(d)
        if not isinstance(parsed, datetime):
            raise ValueError('Could not parse time {d}'.format(d=d))
        return _to_ordinalf(parsed.time())
    if isinstance(d, pydt.time):
        return _to_ordinalf(d)
    return d
Ejemplo n.º 5
0
def time2num(d):
    if isinstance(d, compat.string_types):
        parsed = tools.to_datetime(d)
        if not isinstance(parsed, datetime):
            raise ValueError('Could not parse time %s' % d)
        return _to_ordinalf(parsed.time())
    if isinstance(d, pydt.time):
        return _to_ordinalf(d)
    return d
Ejemplo n.º 6
0
def time2num(d):
    if isinstance(d, str):
        parsed = tools.to_datetime(d)
        if not isinstance(parsed, datetime):
            raise ValueError(f"Could not parse time {d}")
        return _to_ordinalf(parsed.time())
    if isinstance(d, pydt.time):
        return _to_ordinalf(d)
    return d
Ejemplo n.º 7
0
    def _from_sequence_of_strings(cls,
                                  strings,
                                  *,
                                  dtype: Dtype | None = None,
                                  copy=False):
        """
        Construct a new ExtensionArray from a sequence of strings.
        """
        pa_type = to_pyarrow_type(dtype)
        if pa.types.is_timestamp(pa_type):
            from pandas.core.tools.datetimes import to_datetime

            scalars = to_datetime(strings, errors="raise")
        elif pa.types.is_date(pa_type):
            from pandas.core.tools.datetimes import to_datetime

            scalars = to_datetime(strings, errors="raise").date
        elif pa.types.is_duration(pa_type):
            from pandas.core.tools.timedeltas import to_timedelta

            scalars = to_timedelta(strings, errors="raise")
        elif pa.types.is_time(pa_type):
            from pandas.core.tools.times import to_time

            # "coerce" to allow "null times" (None) to not raise
            scalars = to_time(strings, errors="coerce")
        elif pa.types.is_boolean(pa_type):
            from pandas.core.arrays import BooleanArray

            scalars = BooleanArray._from_sequence_of_strings(
                strings).to_numpy()
        elif (pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type)
              or pa.types.is_decimal(pa_type)):
            from pandas.core.tools.numeric import to_numeric

            scalars = to_numeric(strings, errors="raise")
        else:
            # Let pyarrow try to infer or raise
            scalars = strings
        return cls._from_sequence(scalars, dtype=pa_type, copy=copy)
Ejemplo n.º 8
0
    def converter(*date_cols):
        if date_parser is None:
            strs = parsing.concat_date_cols(date_cols)

            try:
                return tools.to_datetime(
                    ensure_object(strs),
                    utc=None,
                    dayfirst=dayfirst,
                    errors="ignore",
                    infer_datetime_format=infer_datetime_format,
                    cache=cache_dates,
                ).to_numpy()

            except ValueError:
                return tools.to_datetime(parsing.try_parse_dates(
                    strs, dayfirst=dayfirst),
                                         cache=cache_dates)
        else:
            try:
                result = tools.to_datetime(date_parser(*date_cols),
                                           errors="ignore",
                                           cache=cache_dates)
                if isinstance(result, datetime.datetime):
                    raise Exception("scalar parser")
                return result
            except Exception:
                try:
                    return tools.to_datetime(
                        parsing.try_parse_dates(
                            parsing.concat_date_cols(date_cols),
                            parser=date_parser,
                            dayfirst=dayfirst,
                        ),
                        errors="ignore",
                    )
                except Exception:
                    return generic_parser(date_parser, *date_cols)
Ejemplo n.º 9
0
def clean_xml(filename, save=False):
    '''
    load_and_clean_xml()

    Function that creates an xml tree from an xml file.
    Then parses that file to find only elements that are step counts.
    Ouputs cleaned data to file.
    '''
    tree = ET.parse(filename)
    root = tree.getroot()

    values = []
    credate = []
    startDates = []
    endDates = []
    units = []
    recordTypes = []
    
    # traverse xml fro data
    for node in root.findall('.//Record[@type="HKQuantityTypeIdentifierStepCount"]'):    
        # only store nodes past a certain date
        if (node.get('creationDate') >= "2020-07-01 00:00:00 -0700"):
            values.append(int(node.get('value')))
            credate.append(dt.datetime.strptime(node.get('creationDate'), '%Y-%m-%d %H:%M:%S %z').date())
            startDates.append(dt.datetime.strptime(node.get('startDate'),  '%Y-%m-%d %H:%M:%S %z'))
            endDates.append(dt.datetime.strptime(node.get('endDate'), '%Y-%m-%d %H:%M:%S %z'))
            units.append(node.get('unit'))
            recordTypes.append(node.get('type'))

    cleaned_data_df = pd.DataFrame({"recordType" : recordTypes, "unit" : units, "creationDate" : credate, "startDate" : startDates, "endDate" : endDates, "value" : values}, 
                    columns=["recordType","unit","creationDate","startDate","endDate","value"])
    cleaned_data_df.creationDate = to_datetime(cleaned_data_df.creationDate)
    
    # format timestamps as UNIX timestamps for easier classification later
    cleaned_data_df['startDate'] = cleaned_data_df['startDate'].values.astype(np.int64) // 10 ** 9
    cleaned_data_df['endDate'] = cleaned_data_df['endDate'].values.astype(np.int64) // 10 ** 9
    # add columns to distinguish datetimes by week day and month
    cleaned_data_df['day of week (numeric)'] = pd.DatetimeIndex(cleaned_data_df['creationDate']).weekday
    cleaned_data_df['day of week (string)'] = pd.DatetimeIndex(cleaned_data_df['creationDate']).strftime('%A')
    cleaned_data_df['month'] = pd.DatetimeIndex(cleaned_data_df['creationDate']).month
    
    # store data if necessary
    if save:
        cleaned_data_df.to_csv('cleaned_apple_steps.csv', index=False)

    return cleaned_data_df
    pass
Ejemplo n.º 10
0
    def test_parse_tz_aware(self):
        # See gh-1693
        import pytz
        data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")

        # it works
        result = self.read_csv(data, index_col=0, parse_dates=True)
        stamp = result.index[0]
        self.assertEqual(stamp.minute, 39)
        try:
            self.assertIs(result.index.tz, pytz.utc)
        except AssertionError:  # hello Yaroslav
            arr = result.index.to_pydatetime()
            result = tools.to_datetime(arr, utc=True)[0]
            self.assertEqual(stamp.minute, result.minute)
            self.assertEqual(stamp.hour, result.hour)
            self.assertEqual(stamp.day, result.day)
Ejemplo n.º 11
0
    def test_parse_tz_aware(self):
        # See gh-1693
        import pytz
        data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")

        # it works
        result = self.read_csv(data, index_col=0, parse_dates=True)
        stamp = result.index[0]
        assert stamp.minute == 39
        try:
            assert result.index.tz is pytz.utc
        except AssertionError:  # hello Yaroslav
            arr = result.index.to_pydatetime()
            result = tools.to_datetime(arr, utc=True)[0]
            assert stamp.minute == result.minute
            assert stamp.hour == result.hour
            assert stamp.day == result.day
Ejemplo n.º 12
0
    def test_parse_tz_aware(self):
        # See gh-1693
        import pytz
        data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")

        # it works
        result = self.read_csv(data, index_col=0, parse_dates=True)
        stamp = result.index[0]
        self.assertEqual(stamp.minute, 39)
        try:
            self.assertIs(result.index.tz, pytz.utc)
        except AssertionError:  # hello Yaroslav
            arr = result.index.to_pydatetime()
            result = tools.to_datetime(arr, utc=True)[0]
            self.assertEqual(stamp.minute, result.minute)
            self.assertEqual(stamp.hour, result.hour)
            self.assertEqual(stamp.day, result.day)
Ejemplo n.º 13
0
    def test_parse_tz_aware(self):
        # See gh-1693
        import pytz
        data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")

        # it works
        result = self.read_csv(data, index_col=0, parse_dates=True)
        stamp = result.index[0]
        assert stamp.minute == 39
        try:
            assert result.index.tz is pytz.utc
        except AssertionError:  # hello Yaroslav
            arr = result.index.to_pydatetime()
            result = tools.to_datetime(arr, utc=True)[0]
            assert stamp.minute == result.minute
            assert stamp.hour == result.hour
            assert stamp.day == result.day
Ejemplo n.º 14
0
    def _convert_1d(values, unit, axis):
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, np.datetime64):
            return _dt_to_float_ordinal(tslibs.Timestamp(values))
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (is_integer(values) or is_float(values)):
            return values
        elif isinstance(values, compat.string_types):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray, Index, ABCSeries)):
            if isinstance(values, ABCSeries):
                # https://github.com/matplotlib/matplotlib/issues/11391
                # Series was skipped. Convert to DatetimeIndex to get asi8
                values = Index(values)
            if isinstance(values, Index):
                values = values.values
            if not isinstance(values, np.ndarray):
                values = com.asarray_tuplesafe(values)

            if is_integer_dtype(values) or is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = _dt_to_float_ordinal(values)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                values = _dt_to_float_ordinal(values)

        return values
Ejemplo n.º 15
0
    def _convert_1d(values, unit, axis):
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, np.datetime64):
            return _dt_to_float_ordinal(tslibs.Timestamp(values))
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (is_integer(values) or is_float(values)):
            return values
        elif isinstance(values, str):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray, Index, ABCSeries)):
            if isinstance(values, ABCSeries):
                # https://github.com/matplotlib/matplotlib/issues/11391
                # Series was skipped. Convert to DatetimeIndex to get asi8
                values = Index(values)
            if isinstance(values, Index):
                values = values.values
            if not isinstance(values, np.ndarray):
                values = com.asarray_tuplesafe(values)

            if is_integer_dtype(values) or is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = _dt_to_float_ordinal(values)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                values = _dt_to_float_ordinal(values)

        return values
Ejemplo n.º 16
0
    def _convert_1d(values, unit, axis):
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, np.datetime64):
            return _dt_to_float_ordinal(lib.Timestamp(values))
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (is_integer(values) or is_float(values)):
            return values
        elif isinstance(values, compat.string_types):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray, Index)):
            if isinstance(values, Index):
                values = values.values
            if not isinstance(values, np.ndarray):
                values = com._asarray_tuplesafe(values)

            if is_integer_dtype(values) or is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = _dt_to_float_ordinal(values)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                values = _dt_to_float_ordinal(values)

        return values
Ejemplo n.º 17
0
    def truncate(
        self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True
    ) -> FrameOrSeries:

        if axis is None:
            axis = self._stat_axis_number
        axis = self._get_axis_number(axis)
        ax = self._get_axis(axis)

        # GH 17935
        # Check that index is sorted
        if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing:
            raise ValueError("truncate requires a sorted index")

        # if we have a date index, convert to dates, otherwise
        # treat like a slice
        if ax.is_all_dates:
            from pandas.core.tools.datetimes import to_datetime

            before = to_datetime(before)
            after = to_datetime(after)

        if before is not None and after is not None:
            if before > after:
                raise ValueError(f"Truncate: {after} must be after {before}")

        if ax.is_monotonic_decreasing:
            before, after = after, before

        slicer = [slice(None, None)] * self._AXIS_LEN
        slicer[axis] = slice(before, after)
        result = self.loc[tuple(slicer)]

        if isinstance(ax, MultiIndex):
            setattr(result, self._get_axis_name(axis), ax.truncate(before, after))

        if copy:
            result = result.copy()

        return result




        """
        Truncate a Series or DataFrame before and after some index value.
        This is a useful shorthand for boolean indexing based on index
        values above or below certain thresholds.
        Parameters
        ----------
        before : date, str, int
            Truncate all rows before this index value.
        after : date, str, int
            Truncate all rows after this index value.
        axis : {0 or 'index', 1 or 'columns'}, optional
            Axis to truncate. Truncates the index (rows) by default.
        copy : bool, default is True,
            Return a copy of the truncated section.
        Returns
        -------
        type of caller
            The truncated Series or DataFrame.
        See Also
        --------
        DataFrame.loc : Select a subset of a DataFrame by label.
        DataFrame.iloc : Select a subset of a DataFrame by position.
        Notes
        -----
        If the index being truncated contains only datetime values,
        `before` and `after` may be specified as strings instead of
        Timestamps.
        Examples
        --------
        >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],
        ...                    'B': ['f', 'g', 'h', 'i', 'j'],
        ...                    'C': ['k', 'l', 'm', 'n', 'o']},
        ...                   index=[1, 2, 3, 4, 5])
        >>> df
           A  B  C
        1  a  f  k
        2  b  g  l
        3  c  h  m
        4  d  i  n
        5  e  j  o
        >>> df.truncate(before=2, after=4)
           A  B  C
        2  b  g  l
        3  c  h  m
        4  d  i  n
        The columns of a DataFrame can be truncated.
        """





        
Ejemplo n.º 18
0
 def try_parse(values):
     try:
         return _dt_to_float_ordinal(tools.to_datetime(values))
     except Exception:
         return values
Ejemplo n.º 19
0
def to_date(*args, **kwargs):
    return to_datetime(*args, **kwargs).date()
Ejemplo n.º 20
0
 def test_non_datetimeindex(self):
     dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000'])
     self.assertEqual(frequencies.infer_freq(dates), 'D')
Ejemplo n.º 21
0
def test_non_datetime_index():
    dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"])
    assert frequencies.infer_freq(dates) == "D"
Ejemplo n.º 22
0
 def test_non_datetimeindex(self):
     dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000'])
     self.assertEqual(frequencies.infer_freq(dates), 'D')
Ejemplo n.º 23
0
 def try_parse(values):
     try:
         return dates.date2num(tools.to_datetime(values))
     except Exception:
         return values
Ejemplo n.º 24
0
def test_non_datetime_index():
    dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"])
    assert frequencies.infer_freq(dates) == "D"
Ejemplo n.º 25
0
 def test_non_datetimeindex(self):
     dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000'])
     assert frequencies.infer_freq(dates) == 'D'
Ejemplo n.º 26
0
 def test_non_datetimeindex(self):
     dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000'])
     assert frequencies.infer_freq(dates) == 'D'
Ejemplo n.º 27
0
 def try_parse(values):
     try:
         return _dt_to_float_ordinal(tools.to_datetime(values))
     except Exception:
         return values