Esempio n. 1
0
File: ops.py Progetto: pydata/pandas
    def convert_values(self):
        """Convert datetimes to a comparable value in an expression.
        """
        def stringify(value):
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded,
                                  encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        lhs, rhs = self.lhs, self.rhs

        if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
            v = rhs.value
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = Timestamp(_ensure_decoded(v))
            if v.tz is not None:
                v = v.tz_convert('UTC')
            self.rhs.update(v)

        if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
            v = lhs.value
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = Timestamp(_ensure_decoded(v))
            if v.tz is not None:
                v = v.tz_convert('UTC')
            self.lhs.update(v)
Esempio n. 2
0
    def convert_values(self) -> None:
        """
        Convert datetimes to a comparable value in an expression.
        """
        def stringify(value):
            encoder: Callable
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded, encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        lhs, rhs = self.lhs, self.rhs

        if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
            v = rhs.value
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = Timestamp(ensure_decoded(v))
            if v.tz is not None:
                v = v.tz_convert("UTC")
            self.rhs.update(v)

        if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
            v = lhs.value
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = Timestamp(ensure_decoded(v))
            if v.tz is not None:
                v = v.tz_convert("UTC")
            self.lhs.update(v)
Esempio n. 3
0
def describe_categorical_1d(data: "Series", is_series: bool) -> "Series":
    """Describe series containing categorical data.

    Parameters
    ----------
    data : Series
        Series to be described.
    is_series : bool
        True if the original object is a Series.
        False if the one column of the DataFrame is described.
    """
    names = ["count", "unique"]
    objcounts = data.value_counts()
    count_unique = len(objcounts[objcounts != 0])
    result = [data.count(), count_unique]
    dtype = None
    if result[1] > 0:
        top, freq = objcounts.index[0], objcounts.iloc[0]
        if is_datetime64_any_dtype(data.dtype):
            if is_series:
                stacklevel = 6
            else:
                stacklevel = 7
            warnings.warn(
                "Treating datetime data as categorical rather than numeric in "
                "`.describe` is deprecated and will be removed in a future "
                "version of pandas. Specify `datetime_is_numeric=True` to "
                "silence this warning and adopt the future behavior now.",
                FutureWarning,
                stacklevel=stacklevel,
            )
            tz = data.dt.tz
            asint = data.dropna().values.view("i8")
            top = Timestamp(top)
            if top.tzinfo is not None and tz is not None:
                # Don't tz_localize(None) if key is already tz-aware
                top = top.tz_convert(tz)
            else:
                top = top.tz_localize(tz)
            names += ["top", "freq", "first", "last"]
            result += [
                top,
                freq,
                Timestamp(asint.min(), tz=tz),
                Timestamp(asint.max(), tz=tz),
            ]
        else:
            names += ["top", "freq"]
            result += [top, freq]

    # If the DataFrame is empty, set 'top' and 'freq' to None
    # to maintain output shape consistency
    else:
        names += ["top", "freq"]
        result += [np.nan, np.nan]
        dtype = "object"

    from pandas import Series

    return Series(result, index=names, name=data.name, dtype=dtype)
Esempio n. 4
0
    def convert_value(self, v) -> "TermValue":
        """ convert the expression that is in the term to something that is
        accepted by pytables """
        def stringify(value):
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded, encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        kind = _ensure_decoded(self.kind)
        meta = _ensure_decoded(self.meta)
        if kind == "datetime64" or kind == "datetime":
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = _ensure_decoded(v)
            v = Timestamp(v)
            if v.tz is not None:
                v = v.tz_convert("UTC")
            return TermValue(v, v.value, kind)
        elif kind == "timedelta64" or kind == "timedelta":
            v = Timedelta(v, unit="s").value
            return TermValue(int(v), v, kind)
        elif meta == "category":
            metadata = com.values_from_object(self.metadata)
            result = metadata.searchsorted(v, side="left")

            # result returns 0 if v is first element or if v is not in metadata
            # check that metadata contains v
            if not result and v not in metadata:
                result = -1
            return TermValue(result, result, "integer")
        elif kind == "integer":
            v = int(float(v))
            return TermValue(v, v, kind)
        elif kind == "float":
            v = float(v)
            return TermValue(v, v, kind)
        elif kind == "bool":
            if isinstance(v, str):
                v = not v.strip().lower() in [
                    "false",
                    "f",
                    "no",
                    "n",
                    "none",
                    "0",
                    "[]",
                    "{}",
                    "",
                ]
            else:
                v = bool(v)
            return TermValue(v, v, kind)
        elif isinstance(v, str):
            # string quoting
            return TermValue(v, stringify(v), "string")
        else:
            raise TypeError(
                f"Cannot compare {v} of type {type(v)} to {kind} column")
Esempio n. 5
0
    def convert_value(self, v):
        """ convert the expression that is in the term to something that is
        accepted by pytables """

        def stringify(value):
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded,
                                  encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        kind = _ensure_decoded(self.kind)
        meta = _ensure_decoded(self.meta)
        if kind == 'datetime64' or kind == 'datetime':
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = _ensure_decoded(v)
            v = Timestamp(v)
            if v.tz is not None:
                v = v.tz_convert('UTC')
            return TermValue(v, v.value, kind)
        elif kind == 'timedelta64' or kind == 'timedelta':
            v = Timedelta(v, unit='s').value
            return TermValue(int(v), v, kind)
        elif meta == 'category':
            metadata = com.values_from_object(self.metadata)
            result = metadata.searchsorted(v, side='left')

            # result returns 0 if v is first element or if v is not in metadata
            # check that metadata contains v
            if not result and v not in metadata:
                result = -1
            return TermValue(result, result, 'integer')
        elif kind == 'integer':
            v = int(float(v))
            return TermValue(v, v, kind)
        elif kind == 'float':
            v = float(v)
            return TermValue(v, v, kind)
        elif kind == 'bool':
            if isinstance(v, str):
                v = not v.strip().lower() in ['false', 'f', 'no',
                                              'n', 'none', '0',
                                              '[]', '{}', '']
            else:
                v = bool(v)
            return TermValue(v, v, kind)
        elif isinstance(v, str):
            # string quoting
            return TermValue(v, stringify(v), 'string')
        else:
            raise TypeError("Cannot compare {v} of type {typ} to {kind} column"
                            .format(v=v, typ=type(v), kind=kind))
Esempio n. 6
0
    def convert_value(self, v):
        """ convert the expression that is in the term to something that is
        accepted by pytables """

        def stringify(value):
            if self.encoding is not None:
                encoder = partial(pprint_thing_encoded,
                                  encoding=self.encoding)
            else:
                encoder = pprint_thing
            return encoder(value)

        kind = _ensure_decoded(self.kind)
        meta = _ensure_decoded(self.meta)
        if kind == 'datetime64' or kind == 'datetime':
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = _ensure_decoded(v)
            v = Timestamp(v)
            if v.tz is not None:
                v = v.tz_convert('UTC')
            return TermValue(v, v.value, kind)
        elif kind == 'timedelta64' or kind == 'timedelta':
            v = Timedelta(v, unit='s').value
            return TermValue(int(v), v, kind)
        elif meta == 'category':
            metadata = com.values_from_object(self.metadata)
            result = metadata.searchsorted(v, side='left')

            # result returns 0 if v is first element or if v is not in metadata
            # check that metadata contains v
            if not result and v not in metadata:
                result = -1
            return TermValue(result, result, 'integer')
        elif kind == 'integer':
            v = int(float(v))
            return TermValue(v, v, kind)
        elif kind == 'float':
            v = float(v)
            return TermValue(v, v, kind)
        elif kind == 'bool':
            if isinstance(v, str):
                v = not v.strip().lower() in ['false', 'f', 'no',
                                              'n', 'none', '0',
                                              '[]', '{}', '']
            else:
                v = bool(v)
            return TermValue(v, v, kind)
        elif isinstance(v, str):
            # string quoting
            return TermValue(v, stringify(v), 'string')
        else:
            raise TypeError("Cannot compare {v} of type {typ} to {kind} column"
                            .format(v=v, typ=type(v), kind=kind))
Esempio n. 7
0
def describe_timestamp_as_categorical_1d(
    data: Series,
    percentiles_ignored: Sequence[float],
) -> Series:
    """Describe series containing timestamp data treated as categorical.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    """
    names = ["count", "unique"]
    objcounts = data.value_counts()
    count_unique = len(objcounts[objcounts != 0])
    result = [data.count(), count_unique]
    dtype = None
    if count_unique > 0:
        top, freq = objcounts.index[0], objcounts.iloc[0]
        tz = data.dt.tz
        asint = data.dropna().values.view("i8")
        top = Timestamp(top)
        if top.tzinfo is not None and tz is not None:
            # Don't tz_localize(None) if key is already tz-aware
            top = top.tz_convert(tz)
        else:
            top = top.tz_localize(tz)
        names += ["top", "freq", "first", "last"]
        result += [
            top,
            freq,
            Timestamp(asint.min(), tz=tz),
            Timestamp(asint.max(), tz=tz),
        ]

    # If the DataFrame is empty, set 'top' and 'freq' to None
    # to maintain output shape consistency
    else:
        names += ["top", "freq"]
        result += [np.nan, np.nan]
        dtype = "object"

    from pandas import Series

    return Series(result, index=names, name=data.name, dtype=dtype)
Esempio n. 8
0
    def test_timestamp(self):
        # GH#17329
        # tz-naive --> treat it as if it were UTC for purposes of timestamp()
        ts = Timestamp.now()
        uts = ts.replace(tzinfo=utc)
        assert ts.timestamp() == uts.timestamp()

        tsc = Timestamp("2014-10-11 11:00:01.12345678", tz="US/Central")
        utsc = tsc.tz_convert("UTC")

        # utsc is a different representation of the same time
        assert tsc.timestamp() == utsc.timestamp()

        # datetime.timestamp() converts in the local timezone
        with tm.set_timezone("UTC"):
            # should agree with datetime.timestamp method
            dt = ts.to_pydatetime()
            assert dt.timestamp() == ts.timestamp()
Esempio n. 9
0
    def convert_value(self, v) -> TermValue:
        """
        convert the expression that is in the term to something that is
        accepted by pytables
        """
        def stringify(value):
            if self.encoding is not None:
                return pprint_thing_encoded(value, encoding=self.encoding)
            return pprint_thing(value)

        kind = ensure_decoded(self.kind)
        meta = ensure_decoded(self.meta)
        if kind == "datetime64" or kind == "datetime":
            if isinstance(v, (int, float)):
                v = stringify(v)
            v = ensure_decoded(v)
            v = Timestamp(v)
            if v.tz is not None:
                v = v.tz_convert("UTC")
            return TermValue(v, v.value, kind)
        elif kind == "timedelta64" or kind == "timedelta":
            if isinstance(v, str):
                v = Timedelta(v).value
            else:
                v = Timedelta(v, unit="s").value
            return TermValue(int(v), v, kind)
        elif meta == "category":
            metadata = extract_array(self.metadata, extract_numpy=True)
            if v not in metadata:
                result = -1
            else:
                # error: Incompatible types in assignment (expression has type
                # "Union[Any, ndarray]", variable has type "int")
                result = metadata.searchsorted(  # type: ignore[assignment]
                    v, side="left")
            return TermValue(result, result, "integer")
        elif kind == "integer":
            v = int(float(v))
            return TermValue(v, v, kind)
        elif kind == "float":
            v = float(v)
            return TermValue(v, v, kind)
        elif kind == "bool":
            if isinstance(v, str):
                v = not v.strip().lower() in [
                    "false",
                    "f",
                    "no",
                    "n",
                    "none",
                    "0",
                    "[]",
                    "{}",
                    "",
                ]
            else:
                v = bool(v)
            return TermValue(v, v, kind)
        elif isinstance(v, str):
            # string quoting
            return TermValue(v, stringify(v), "string")
        else:
            raise TypeError(
                f"Cannot compare {v} of type {type(v)} to {kind} column")