Example #1
0
def _factorize_keys(lk, rk, sort=True):
    if com.is_integer_dtype(lk) and com.is_integer_dtype(rk):
        klass = lib.Int64Factorizer
        lk = com._ensure_int64(lk)
        rk = com._ensure_int64(rk)
    else:
        klass = lib.Factorizer
        lk = com._ensure_object(lk)
        rk = com._ensure_object(rk)

    rizer = klass(max(len(lk), len(rk)))

    llab, _ = rizer.factorize(lk)
    rlab, _ = rizer.factorize(rk)

    count = rizer.get_count()

    if sort:
        llab, rlab = _sort_labels(rizer.uniques, llab, rlab)

    # NA group
    lmask = llab == -1; lany = lmask.any()
    rmask = rlab == -1; rany = rmask.any()

    if lany or rany:
        if lany:
            np.putmask(llab, lmask, count)
        if rany:
            np.putmask(rlab, rmask, count)
        count += 1

    return llab, rlab, count
Example #2
0
    def wrapper(self, other):
        is_self_int_dtype = com.is_integer_dtype(self.dtype)

        fill_int = lambda x: x.fillna(0)
        fill_bool = lambda x: x.fillna(False).astype(bool)

        if isinstance(other, pd.Series):
            name = _maybe_match_name(self, other)
            other = other.reindex_like(self)
            is_other_int_dtype = com.is_integer_dtype(other.dtype)
            other = fill_int(other) if is_other_int_dtype else fill_bool(other)

            filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool
            return filler(
                self._constructor(na_op(self.values, other.values),
                                  index=self.index,
                                  name=name))

        elif isinstance(other, pd.DataFrame):
            return NotImplemented

        else:
            # scalars, list, tuple, np.array
            filler = fill_int if is_self_int_dtype and com.is_integer_dtype(
                np.asarray(other)) else fill_bool
            return filler(
                self._constructor(na_op(self.values, other),
                                  index=self.index)).__finalize__(self)
Example #3
0
    def wrapper(self, other):
        is_self_int_dtype = is_integer_dtype(self.dtype)

        fill_int = lambda x: x.fillna(0)
        fill_bool = lambda x: x.fillna(False).astype(bool)

        if isinstance(other, pd.Series):
            name = _maybe_match_name(self, other)
            other = other.reindex_like(self)
            is_other_int_dtype = is_integer_dtype(other.dtype)
            other = fill_int(other) if is_other_int_dtype else fill_bool(other)

            filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool
            return filler(self._constructor(na_op(self.values, other.values),
                                     index=self.index,
                                     name=name))

        elif isinstance(other, pd.DataFrame):
            return NotImplemented

        else:
            # scalars, list, tuple, np.array
            filler = fill_int if is_self_int_dtype and is_integer_dtype(np.asarray(other)) else fill_bool
            return filler(self._constructor(na_op(self.values, other),
                                    index=self.index)).__finalize__(self)
Example #4
0
 def test_delevel_infer_dtype(self):
     tuples = [tuple for tuple in cart_product(["foo", "bar"], [10, 20], [1.0, 1.1])]
     index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"])
     df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index)
     deleveled = df.reset_index()
     self.assert_(com.is_integer_dtype(deleveled["prm1"]))
     self.assert_(com.is_float_dtype(deleveled["prm2"]))
Example #5
0
    def _prep_values(self, values=None, kill_inf=True, how=None):

        if values is None:
            values = getattr(self._selected_obj, 'values', self._selected_obj)

        # GH #12373 : rolling functions error on float32 data
        # make sure the data is coerced to float64
        if com.is_float_dtype(values.dtype):
            values = com._ensure_float64(values)
        elif com.is_integer_dtype(values.dtype):
            values = com._ensure_float64(values)
        elif com.needs_i8_conversion(values.dtype):
            raise NotImplementedError("ops for {action} for this "
                                      "dtype {dtype} are not "
                                      "implemented".format(
                                          action=self._window_type,
                                          dtype=values.dtype))
        else:
            try:
                values = com._ensure_float64(values)
            except (ValueError, TypeError):
                raise TypeError("cannot handle this type -> {0}"
                                "".format(values.dtype))

        if kill_inf:
            values = values.copy()
            values[np.isinf(values)] = np.NaN

        return values
Example #6
0
def _get_data_algo(values, func_map):
    mask = None
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)

    elif com.needs_i8_conversion(values):

        # if we have NaT, punt to object dtype
        mask = com.isnull(values)
        if mask.ravel().any():
            f = func_map['generic']
            values = com._ensure_object(values)
            values[mask] = np.nan
        else:
            f = func_map['int64']
            values = values.view('i8')

    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Example #7
0
def _get_data_algo(values, func_map):
    mask = None
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)

    elif com.needs_i8_conversion(values):

        # if we have NaT, punt to object dtype
        mask = com.isnull(values)
        if mask.ravel().any():
            f = func_map['generic']
            values = com._ensure_object(values)
            values[mask] = np.nan
        else:
            f = func_map['int64']
            values = values.view('i8')

    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Example #8
0
 def _maybe_convert_timedelta(self, other):
     if isinstance(other,
                   (timedelta, np.timedelta64, offsets.Tick, Timedelta)):
         offset = frequencies.to_offset(self.freq.rule_code)
         if isinstance(offset, offsets.Tick):
             nanos = tslib._delta_to_nanoseconds(other)
             offset_nanos = tslib._delta_to_nanoseconds(offset)
             if nanos % offset_nanos == 0:
                 return nanos // offset_nanos
     elif isinstance(other, offsets.DateOffset):
         freqstr = frequencies.get_standard_freq(other)
         base = frequencies.get_base_alias(freqstr)
         if base == self.freq.rule_code:
             return other.n
     elif isinstance(other, np.ndarray):
         if com.is_integer_dtype(other):
             return other
         elif com.is_timedelta64_dtype(other):
             offset = frequencies.to_offset(self.freq)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
                 offset_nanos = tslib._delta_to_nanoseconds(offset)
                 if (nanos % offset_nanos).all() == 0:
                     return nanos // offset_nanos
     msg = "Input has different freq from PeriodIndex(freq={0})"
     raise ValueError(msg.format(self.freqstr))
Example #9
0
def value_counts(values, sort=True, ascending=False):
    """
    Compute a histogram of the counts of non-null values

    Parameters
    ----------
    values : ndarray (1-d)
    sort : boolean, default True
        Sort by values
    ascending : boolean, default False
        Sort in ascending order

    Returns
    -------
    value_counts : Series
    """
    from pandas.core.series import Series
    from collections import defaultdict
    if com.is_integer_dtype(values.dtype):
        values = com._ensure_int64(values)
        keys, counts = lib.value_count_int64(values)
        result = Series(counts, index=keys)
    else:
        counter = defaultdict(lambda: 0)
        values = values[com.notnull(values)]
        for value in values:
            counter[value] += 1
        result = Series(counter)

    if sort:
        result.sort()
        if not ascending:
            result = result[::-1]

    return result
Example #10
0
File: sql.py Project: yazici/pandas
    def _sqlalchemy_type(self, arr_or_dtype):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
                                      DateTime, Date, Interval)

        if arr_or_dtype is date:
            return Date
        if com.is_datetime64_dtype(arr_or_dtype):
            try:
                tz = arr_or_dtype.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(arr_or_dtype):
            warnings.warn(
                "the 'timedelta' type is not supported, and will be "
                "written as integer values (ns frequency) to the "
                "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(arr_or_dtype):
            return Float
        elif com.is_integer_dtype(arr_or_dtype):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(arr_or_dtype):
            return Boolean
        return Text
Example #11
0
def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values)
            or is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
Example #12
0
    def convert(values, unit, axis):
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (com.is_integer(values) or com.is_float(values)):
            return values
        elif isinstance(values, compat.string_types):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = com._asarray_tuplesafe(values)

            if com.is_integer_dtype(values) or com.is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Example #13
0
    def _convert_listlike(arg, box, unit):

        if isinstance(arg, (list, tuple)) or ((hasattr(arg, '__iter__')
                                               and not hasattr(arg, 'dtype'))):
            arg = np.array(list(arg), dtype='O')

        if is_timedelta64_dtype(arg):
            value = arg.astype('timedelta64[ns]')
        elif is_integer_dtype(arg):

            # these are shortcutable
            value = arg.astype(
                'timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]')
        else:
            try:
                value = tslib.array_to_timedelta64(_ensure_object(arg),
                                                   unit=unit)
            except:

                # try to process strings fast; may need to fallback
                try:
                    value = np.array(
                        [_get_string_converter(r, unit=unit)() for r in arg],
                        dtype='m8[ns]')
                except:
                    value = np.array([
                        _coerce_scalar_to_timedelta_type(r, unit=unit)
                        for r in arg
                    ])

        if box:
            from pandas import TimedeltaIndex
            value = TimedeltaIndex(value, unit='ns')
        return value
Example #14
0
 def _maybe_convert_timedelta(self, other):
     if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)):
         offset = frequencies.to_offset(self.freq.rule_code)
         if isinstance(offset, offsets.Tick):
             nanos = tslib._delta_to_nanoseconds(other)
             offset_nanos = tslib._delta_to_nanoseconds(offset)
             if nanos % offset_nanos == 0:
                 return nanos // offset_nanos
     elif isinstance(other, offsets.DateOffset):
         freqstr = frequencies.get_standard_freq(other)
         base = frequencies.get_base_alias(freqstr)
         if base == self.freq.rule_code:
             return other.n
     elif isinstance(other, np.ndarray):
         if com.is_integer_dtype(other):
             return other
         elif com.is_timedelta64_dtype(other):
             offset = frequencies.to_offset(self.freq)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
                 offset_nanos = tslib._delta_to_nanoseconds(offset)
                 if (nanos % offset_nanos).all() == 0:
                     return nanos // offset_nanos
     msg = "Input has different freq from PeriodIndex(freq={0})"
     raise ValueError(msg.format(self.freqstr))
Example #15
0
    def test_nanmean_overflow(self):
        # GH 10155
        # In the previous implementation mean can overflow for int dtypes, it
        # is now consistent with numpy
        from pandas import Series

        # numpy < 1.9.0 is not computing this correctly
        from distutils.version import LooseVersion
        if LooseVersion(np.__version__) >= '1.9.0':
            for a in [2**55, -2**55, 20150515061816532]:
                s = Series(a, index=range(500), dtype=np.int64)
                result = s.mean()
                np_result = s.values.mean()
                self.assertEqual(result, a)
                self.assertEqual(result, np_result)
                self.assertTrue(result.dtype == np.float64)

        # check returned dtype
        for dtype in [
                np.int16, np.int32, np.int64, np.float16, np.float32,
                np.float64
        ]:
            s = Series(range(10), dtype=dtype)
            result = s.mean()
            if is_integer_dtype(dtype):
                self.assertTrue(result.dtype == np.float64)
            else:
                self.assertTrue(result.dtype == dtype)
Example #16
0
    def test_nanmean_overflow(self):
        # GH 10155
        # In the previous implementation mean can overflow for int dtypes, it
        # is now consistent with numpy
        from pandas import Series

        # numpy < 1.9.0 is not computing this correctly
        from distutils.version import LooseVersion
        if LooseVersion(np.__version__) >= '1.9.0':
            for a in [2 ** 55, -2 ** 55, 20150515061816532]:
                s = Series(a, index=range(500), dtype=np.int64)
                result = s.mean()
                np_result = s.values.mean()
                self.assertEqual(result, a)
                self.assertEqual(result, np_result)
                self.assertTrue(result.dtype == np.float64)

        # check returned dtype
        for dtype in [np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]:
            s = Series(range(10), dtype=dtype)
            result = s.mean()
            if is_integer_dtype(dtype):
                self.assertTrue(result.dtype == np.float64)
            else:
                self.assertTrue(result.dtype == dtype)
Example #17
0
def value_counts(values, sort=True, ascending=False):
    """
    Compute a histogram of the counts of non-null values

    Returns
    -------
    value_counts : Series
    """
    from collections import defaultdict
    if com.is_integer_dtype(values.dtype):
        values = com._ensure_int64(values)
        keys, counts = lib.value_count_int64(values)
        result = Series(counts, index=keys)
    else:
        counter = defaultdict(lambda: 0)
        values = values[com.notnull(values)]
        for value in values:
            counter[value] += 1
        result = Series(counter)

    if sort:
        result.sort()
        if not ascending:
            result = result[::-1]

    return result
Example #18
0
File: sql.py Project: Jemash/pandas
    def _sqlalchemy_type(self, col):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
            DateTime, Date, Time, Interval)

        if com.is_datetime64_dtype(col):
            try:
                tz = col.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(col):
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(col):
            return Float
        elif com.is_integer_dtype(col):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(col):
            return Boolean
        inferred = lib.infer_dtype(com._ensure_object(col))
        if inferred == 'date':
            return Date
        if inferred == 'time':
            return Time
        return Text
Example #19
0
def format_array(values, formatter, float_format=None, na_rep='NaN',
                 digits=None, space=None, justify='right'):
    if com.is_float_dtype(values.dtype):
        fmt_klass = FloatArrayFormatter
    elif com.is_integer_dtype(values.dtype):
        fmt_klass = IntArrayFormatter
    elif com.is_datetime64_dtype(values.dtype):
        fmt_klass = Datetime64Formatter
    else:
        fmt_klass = GenericArrayFormatter

    if space is None:
        space = get_option("print.column_space")

    if float_format is None:
        float_format = get_option("print.float_format")

    if digits is None:
        digits = get_option("print.precision")

    fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
                        float_format=float_format,
                        formatter=formatter, space=space,
                        justify=justify)

    return fmt_obj.get_result()
Example #20
0
    def _sqlalchemy_type(self, col):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
                                      DateTime, Date, Time)

        if com.is_datetime64_dtype(col):
            try:
                tz = col.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(col):
            warnings.warn(
                "the 'timedelta' type is not supported, and will be "
                "written as integer values (ns frequency) to the "
                "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(col):
            return Float
        elif com.is_integer_dtype(col):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(col):
            return Boolean
        inferred = lib.infer_dtype(com._ensure_object(col))
        if inferred == 'date':
            return Date
        if inferred == 'time':
            return Time
        return Text
Example #21
0
def mode(values):
    """Returns the mode or mode(s) of the passed Series or ndarray (sorted)"""
    # must sort because hash order isn't necessarily defined.
    from pandas.core.series import Series

    if isinstance(values, Series):
        constructor = values._constructor
        values = values.values
    else:
        values = np.asanyarray(values)
        constructor = Series

    dtype = values.dtype
    if com.is_integer_dtype(values.dtype):
        values = com._ensure_int64(values)
        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)

    elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
        dtype = values.dtype
        values = values.view(np.int64)
        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)

    else:
        mask = com.isnull(values)
        values = com._ensure_object(values)
        res = htable.mode_object(values, mask)
        try:
            res = sorted(res)
        except TypeError as e:
            warn("Unable to sort modes: %s" % e)
        result = constructor(res, dtype=dtype)

    return result
Example #22
0
 def _maybe_convert_timedelta(self, other):
     if isinstance(other, (timedelta, np.timedelta64,
                           offsets.Tick, Timedelta)):
         offset = frequencies.to_offset(self.freq.rule_code)
         if isinstance(offset, offsets.Tick):
             nanos = tslib._delta_to_nanoseconds(other)
             offset_nanos = tslib._delta_to_nanoseconds(offset)
             if nanos % offset_nanos == 0:
                 return nanos // offset_nanos
     elif isinstance(other, offsets.DateOffset):
         freqstr = frequencies.get_standard_freq(other)
         base = frequencies.get_base_alias(freqstr)
         if base == self.freq.rule_code:
             return other.n
         msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
         raise IncompatibleFrequency(msg)
     elif isinstance(other, np.ndarray):
         if com.is_integer_dtype(other):
             return other
         elif com.is_timedelta64_dtype(other):
             offset = frequencies.to_offset(self.freq)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
                 offset_nanos = tslib._delta_to_nanoseconds(offset)
                 if (nanos % offset_nanos).all() == 0:
                     return nanos // offset_nanos
     # raise when input doesn't have freq
     msg = "Input has different freq from PeriodIndex(freq={0})"
     raise IncompatibleFrequency(msg.format(self.freqstr))
Example #23
0
def backfill_2d(values, limit=None, mask=None, dtype=None):

    if dtype is None:
        dtype = values.dtype
    _method = None
    if com.is_float_dtype(values):
        _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
    elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values):
        _method = _backfill_2d_datetime
    elif com.is_integer_dtype(values):
        values = com._ensure_float64(values)
        _method = algos.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_2d_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name)

    if mask is None:
        mask = com.isnull(values)
    mask = mask.view(np.uint8)

    if np.all(values.shape):
        _method(values, mask, limit=limit)
    else:
        # for test coverage
        pass
    return values
Example #24
0
    def _convert_listlike(arg, box, unit):

        if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))):
            arg = np.array(list(arg), dtype='O')

        if is_timedelta64_dtype(arg):
            value = arg.astype('timedelta64[ns]')
        elif is_integer_dtype(arg):

            # these are shortcutable
            value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]')
        else:
            try:
                value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce)
            except:

                # try to process strings fast; may need to fallback
                try:
                    value = np.array([ _get_string_converter(r, unit=unit)() for r in arg ],dtype='m8[ns]')
                except:
                    value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ])
            value = value.astype('timedelta64[ns]', copy=False)

        if box:
            from pandas import TimedeltaIndex
            value = TimedeltaIndex(value,unit='ns')
        return value
Example #25
0
def _isfinite(values):
    if _is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values) or
            is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
Example #26
0
 def _maybe_convert_timedelta(self, other):
     if isinstance(other,
                   (timedelta, np.timedelta64, offsets.Tick, Timedelta)):
         offset = frequencies.to_offset(self.freq.rule_code)
         if isinstance(offset, offsets.Tick):
             nanos = tslib._delta_to_nanoseconds(other)
             offset_nanos = tslib._delta_to_nanoseconds(offset)
             if nanos % offset_nanos == 0:
                 return nanos // offset_nanos
     elif isinstance(other, offsets.DateOffset):
         freqstr = frequencies.get_standard_freq(other)
         base = frequencies.get_base_alias(freqstr)
         if base == self.freq.rule_code:
             return other.n
         msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
         raise IncompatibleFrequency(msg)
     elif isinstance(other, np.ndarray):
         if com.is_integer_dtype(other):
             return other
         elif com.is_timedelta64_dtype(other):
             offset = frequencies.to_offset(self.freq)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
                 offset_nanos = tslib._delta_to_nanoseconds(offset)
                 if (nanos % offset_nanos).all() == 0:
                     return nanos // offset_nanos
     # raise when input doesn't have freq
     msg = "Input has different freq from PeriodIndex(freq={0})"
     raise IncompatibleFrequency(msg.format(self.freqstr))
Example #27
0
    def _delegate_property_get(self, name):
        from pandas import Series

        result = getattr(self.values, name)

        # maybe need to upcast (ints)
        if isinstance(result, np.ndarray):
            if is_integer_dtype(result):
                result = result.astype('int64')
        elif not is_list_like(result):
            return result

        # blow up if we operate on categories
        if self.orig is not None:
            result = take_1d(result, self.orig.cat.codes)

        # return the result as a Series, which is by definition a copy
        result = Series(result, index=self.index, name=self.name)

        # setting this object will show a SettingWithCopyWarning/Error
        result.is_copy = ("modifications to a property of a datetimelike "
                          "object are not supported and are discarded. "
                          "Change values on the original.")

        return result
Example #28
0
    def _convert_listlike(arg, box, unit):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        if is_timedelta64_dtype(arg):
            value = arg.astype('timedelta64[ns]')
        elif is_integer_dtype(arg):
            unit = _validate_timedelta_unit(unit)

            # these are shortcutable
            value = arg.astype(
                'timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]')
        else:
            try:
                value = tslib.array_to_timedelta64(_ensure_object(arg),
                                                   unit=unit)
            except:
                value = np.array([
                    _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg
                ])

        if box:
            from pandas import Series
            value = Series(value, dtype='m8[ns]')
        return value
Example #29
0
    def restore_type(self, dtype, sample=None):
        """Restore type from Pandas
        """

        # Pandas types
        if pdc.is_bool_dtype(dtype):
            return 'boolean'
        elif pdc.is_datetime64_any_dtype(dtype):
            return 'datetime'
        elif pdc.is_integer_dtype(dtype):
            return 'integer'
        elif pdc.is_numeric_dtype(dtype):
            return 'number'

        # Python types
        if sample is not None:
            if isinstance(sample, (list, tuple)):
                return 'array'
            elif isinstance(sample, datetime.date):
                return 'date'
            elif isinstance(sample, isodate.Duration):
                return 'duration'
            elif isinstance(sample, dict):
                return 'object'
            elif isinstance(sample, six.string_types):
                return 'string'
            elif isinstance(sample, datetime.time):
                return 'time'

        return 'string'
Example #30
0
def mode(values):
    """Returns the mode or mode(s) of the passed Series or ndarray (sorted)"""
    # must sort because hash order isn't necessarily defined.
    from pandas.core.series import Series

    if isinstance(values, Series):
        constructor = values._constructor
        values = values.values
    else:
        values = np.asanyarray(values)
        constructor = Series

    dtype = values.dtype
    if com.is_integer_dtype(values):
        values = com._ensure_int64(values)
        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)

    elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
        dtype = values.dtype
        values = values.view(np.int64)
        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)

    elif com.is_categorical_dtype(values):
        result = constructor(values.mode())
    else:
        mask = com.isnull(values)
        values = com._ensure_object(values)
        res = htable.mode_object(values, mask)
        try:
            res = sorted(res)
        except TypeError as e:
            warn("Unable to sort modes: %s" % e)
        result = constructor(res, dtype=dtype)

    return result
Example #31
0
def format_array(values, formatter, float_format=None, na_rep='NaN',
                 digits=None, space=None, justify='right'):
    if com.is_float_dtype(values.dtype):
        fmt_klass = FloatArrayFormatter
    elif com.is_integer_dtype(values.dtype):
        fmt_klass = IntArrayFormatter
    elif com.is_datetime64_dtype(values.dtype):
        fmt_klass = Datetime64Formatter
    else:
        fmt_klass = GenericArrayFormatter

    if space is None:
        space = print_config.column_space

    if float_format is None:
        float_format = print_config.float_format

    if digits is None:
        digits = print_config.precision

    fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
                        float_format=float_format,
                        formatter=formatter, space=space,
                        justify=justify)

    return fmt_obj.get_result()
Example #32
0
def na_value_for_dtype(dtype):
    """
    Return a dtype compat na value

    Parameters
    ----------
    dtype : string / dtype

    Returns
    -------
    dtype compat na value
    """

    from pandas.core import common as com
    from pandas import NaT
    dtype = pandas_dtype(dtype)

    if (com.is_datetime64_dtype(dtype) or
        com.is_datetime64tz_dtype(dtype) or
        com.is_timedelta64_dtype(dtype)):
        return NaT
    elif com.is_float_dtype(dtype):
        return np.nan
    elif com.is_integer_dtype(dtype):
        return 0
    elif com.is_bool_dtype(dtype):
        return False
    return np.nan
Example #33
0
def value_counts(values, sort=True, ascending=False):
    """
    Compute a histogram of the counts of non-null values

    Returns
    -------
    value_counts : Series
    """
    from collections import defaultdict
    if com.is_integer_dtype(values.dtype):
        values = com._ensure_int64(values)
        keys, counts = lib.value_count_int64(values)
        result = Series(counts, index=keys)
    else:
        counter = defaultdict(lambda: 0)
        values = values[com.notnull(values)]
        for value in values:
            counter[value] += 1
        result = Series(counter)

    if sort:
        result.sort()
        if not ascending:
            result = result[::-1]

    return result
Example #34
0
    def convert(values, unit, axis):
        from pandas.tseries.index import DatetimeIndex
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (com.is_integer(values) or com.is_float(values)):
            return values
        elif isinstance(values, basestring):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = np._asarray_tuplesafe(values)

            if com.is_integer_dtype(values) or com.is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Example #35
0
    def _delegate_property_get(self, name):
        from pandas import Series

        result = getattr(self.values, name)

        # maybe need to upcast (ints)
        if isinstance(result, np.ndarray):
            if is_integer_dtype(result):
                result = result.astype('int64')
        elif not is_list_like(result):
            return result

        # blow up if we operate on categories
        if self.orig is not None:
            result = take_1d(result, self.orig.cat.codes)

        # return the result as a Series, which is by definition a copy
        result = Series(result, index=self.index, name=self.name)

        # setting this object will show a SettingWithCopyWarning/Error
        result.is_copy = ("modifications to a property of a datetimelike "
                          "object are not supported and are discarded. "
                          "Change values on the original.")

        return result
Example #36
0
    def _sqlalchemy_type(self, arr_or_dtype):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
            DateTime, Date, Interval)

        if arr_or_dtype is date:
            return Date
        if com.is_datetime64_dtype(arr_or_dtype):
            try:
                tz = arr_or_dtype.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(arr_or_dtype):
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(arr_or_dtype):
            return Float
        elif com.is_integer_dtype(arr_or_dtype):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(arr_or_dtype):
            return Boolean
        return Text
Example #37
0
def value_counts(values, sort=True, ascending=False):
    """
    Compute a histogram of the counts of non-null values

    Parameters
    ----------
    values : ndarray (1-d)
    sort : boolean, default True
        Sort by values
    ascending : boolean, default False
        Sort in ascending order

    Returns
    -------
    value_counts : Series
    """
    from pandas.core.series import Series
    from collections import defaultdict
    if com.is_integer_dtype(values.dtype):
        values = com._ensure_int64(values)
        keys, counts = lib.value_count_int64(values)
        result = Series(counts, index=keys)
    else:
        counter = defaultdict(lambda: 0)
        values = values[com.notnull(values)]
        for value in values:
            counter[value] += 1
        result = Series(counter)

    if sort:
        result.sort()
        if not ascending:
            result = result[::-1]

    return result
Example #38
0
 def astype(self, dtype, copy=True):
     dtype = np.dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy), name=self.name,
                      dtype='i8')
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
Example #39
0
    def coerce(values):
        # we allow coercion to if errors allows
        values = to_numeric(values, errors=errors)

        # prevent overflow in case of int8 or int16
        if com.is_integer_dtype(values):
            values = values.astype('int64', copy=False)
        return values
 def get_expected(s, name):
     result = getattr(Index(s._values), prop)
     if isinstance(result, np.ndarray):
         if com.is_integer_dtype(result):
             result = result.astype('int64')
     elif not com.is_list_like(result):
         return result
     return Series(result, index=s.index, name=s.name)
Example #41
0
    def coerce(values):
        # we allow coercion to if errors allows
        values = to_numeric(values, errors=errors)

        # prevent overflow in case of int8 or int16
        if com.is_integer_dtype(values):
            values = values.astype('int64', copy=False)
        return values
Example #42
0
 def get_expected(s, name):
     result = getattr(Index(s._values), prop)
     if isinstance(result, np.ndarray):
         if com.is_integer_dtype(result):
             result = result.astype('int64')
     elif not com.is_list_like(result):
         return result
     return Series(result, index=s.index, name=s.name)
Example #43
0
 def astype(self, dtype, copy=True):
     dtype = np.dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy),
                      name=self.name,
                      dtype='i8')
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
Example #44
0
 def test_delevel_infer_dtype(self):
     tuples = [tuple for tuple in cart_product(['foo', 'bar'],
                                               [10, 20], [1.0, 1.1])]
     index = MultiIndex.from_tuples(tuples,
                                    names=['prm0', 'prm1', 'prm2'])
     df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'],
                    index=index)
     deleveled = df.reset_index()
     self.assert_(com.is_integer_dtype(deleveled['prm1']))
     self.assert_(com.is_float_dtype(deleveled['prm2']))
Example #45
0
 def test_delevel_infer_dtype(self):
     tuples = [tuple for tuple in cart_product(['foo', 'bar'],
                                               [10, 20], [1.0, 1.1])]
     index = MultiIndex.from_tuples(tuples,
                                    names=['prm0', 'prm1', 'prm2'])
     df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'],
                    index=index)
     deleveled = df.delevel()
     self.assert_(com.is_integer_dtype(deleveled['prm1']))
     self.assert_(com.is_float_dtype(deleveled['prm2']))
Example #46
0
def count(values, uniques=None):
    if uniques is not None:
        raise NotImplementedError
    else:
        if com.is_float_dtype(values):
            return _count_generic(values, lib.Float64HashTable, _ensure_float64)
        elif com.is_integer_dtype(values):
            return _count_generic(values, lib.Int64HashTable, _ensure_int64)
        else:
            return _count_generic(values, lib.PyObjectHashTable, _ensure_object)
Example #47
0
def _hashtable_algo(f, dtype):
    """
    f(HashTable, type_caster) -> result
    """
    if com.is_float_dtype(dtype):
        return f(htable.Float64HashTable, com._ensure_float64)
    elif com.is_integer_dtype(dtype):
        return f(htable.Int64HashTable, com._ensure_int64)
    else:
        return f(htable.PyObjectHashTable, com._ensure_object)
Example #48
0
def _hashtable_algo(f, dtype):
    """
    f(HashTable, type_caster) -> result
    """
    if com.is_float_dtype(dtype):
        return f(htable.Float64HashTable, com._ensure_float64)
    elif com.is_integer_dtype(dtype):
        return f(htable.Int64HashTable, com._ensure_int64)
    else:
        return f(htable.PyObjectHashTable, com._ensure_object)
Example #49
0
    def _wrap_access_object(self, obj):
        # we may need to coerce the input as we don't want non int64 if
        # we have an integer result
        if hasattr(obj, 'dtype') and com.is_integer_dtype(obj):
            obj = obj.astype(np.int64)

        if isinstance(self, com.ABCSeries):
            return self._constructor(obj, index=self.index).__finalize__(self)

        return obj
    def _wrap_access_object(self, obj):
        # we may need to coerce the input as we don't want non int64 if
        # we have an integer result
        if hasattr(obj,'dtype') and com.is_integer_dtype(obj):
            obj = obj.astype(np.int64)

        if isinstance(self, com.ABCSeries):
            return self._constructor(obj,index=self.index).__finalize__(self)

        return obj
Example #51
0
def _value_counts_arraylike(values, dropna=True):
    is_datetimetz = com.is_datetimetz(values)
    is_period = (isinstance(values, gt.ABCPeriodIndex)
                 or com.is_period_arraylike(values))

    orig = values

    from pandas.core.series import Series
    values = Series(values).values
    dtype = values.dtype

    if com.is_datetime_or_timedelta_dtype(dtype) or is_period:
        from pandas.tseries.index import DatetimeIndex
        from pandas.tseries.period import PeriodIndex

        if is_period:
            values = PeriodIndex(values)
            freq = values.freq

        values = values.view(np.int64)
        keys, counts = htable.value_count_scalar64(values, dropna)

        if dropna:
            msk = keys != iNaT
            keys, counts = keys[msk], counts[msk]

        # convert the keys back to the dtype we came in
        keys = keys.astype(dtype)

        # dtype handling
        if is_datetimetz:
            if isinstance(orig, gt.ABCDatetimeIndex):
                tz = orig.tz
            else:
                tz = orig.dt.tz
            keys = DatetimeIndex._simple_new(keys, tz=tz)
        if is_period:
            keys = PeriodIndex._simple_new(keys, freq=freq)

    elif com.is_integer_dtype(dtype):
        values = com._ensure_int64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    elif com.is_float_dtype(dtype):
        values = com._ensure_float64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    else:
        values = com._ensure_object(values)
        mask = com.isnull(values)
        keys, counts = htable.value_count_object(values, mask)
        if not dropna and mask.any():
            keys = np.insert(keys, 0, np.NaN)
            counts = np.insert(counts, 0, mask.sum())

    return keys, counts
Example #52
0
def _get_hash_table_and_cast(values):
    if com.is_float_dtype(values):
        klass = lib.Float64HashTable
        values = com._ensure_float64(values)
    elif com.is_integer_dtype(values):
        klass = lib.Int64HashTable
        values = com._ensure_int64(values)
    else:
        klass = lib.PyObjectHashTable
        values = com._ensure_object(values)
    return klass, values
Example #53
0
def _get_data_algo(values, func_map):
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)
    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Example #54
0
def _get_data_algo(values, func_map):
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)
    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Example #55
0
 def astype(self, dtype):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype) or is_integer_dtype(dtype):
         values = self._values.astype(dtype)
     elif is_object_dtype(dtype):
         values = self._values
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Example #56
0
 def astype(self, dtype):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype) or is_integer_dtype(dtype):
         values = self._values.astype(dtype)
     elif is_object_dtype(dtype):
         values = self._values
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Example #57
0
def _get_hash_table_and_cast(values):
    if com.is_float_dtype(values):
        klass = lib.Float64HashTable
        values = com._ensure_float64(values)
    elif com.is_integer_dtype(values):
        klass = lib.Int64HashTable
        values = com._ensure_int64(values)
    else:
        klass = lib.PyObjectHashTable
        values = com._ensure_object(values)
    return klass, values