예제 #1
0
    def wrapper(self, other):
        is_self_int_dtype = is_integer_dtype(self.dtype)

        fill_int = lambda x: x.fillna(0)
        fill_bool = lambda x: x.fillna(False).astype(bool)

        if isinstance(other, ABCSeries):
            name = _maybe_match_name(self, other)
            other = other.reindex_like(self)
            is_other_int_dtype = is_integer_dtype(other.dtype)
            other = fill_int(other) if is_other_int_dtype else fill_bool(other)

            filler = (fill_int if is_self_int_dtype and is_other_int_dtype
                      else fill_bool)
            return filler(self._constructor(na_op(self.values, other.values),
                                            index=self.index, name=name))

        elif isinstance(other, pd.DataFrame):
            return NotImplemented

        else:
            # scalars, list, tuple, np.array
            filler = (fill_int if is_self_int_dtype and
                      is_integer_dtype(np.asarray(other)) else fill_bool)
            return filler(self._constructor(
                na_op(self.values, other),
                index=self.index)).__finalize__(self)
예제 #2
0
def coerce_dtypes(df, dtypes):
    """ Coerce dataframe to dtypes safely

    Operates in place

    Parameters
    ----------
    df: Pandas DataFrame
    dtypes: dict like {'x': float}
    """
    for c in df.columns:
        if c in dtypes and df.dtypes[c] != dtypes[c]:
            if is_float_dtype(df.dtypes[c]) and is_integer_dtype(dtypes[c]):
                # There is a mismatch between floating and integer columns.
                # Determine all mismatched and error.
                mismatched = sorted(c for c in df.columns
                                    if is_float_dtype(df.dtypes[c])
                                    and is_integer_dtype(dtypes[c]))

                msg = ("Mismatched dtypes found.\n"
                       "Expected integers, but found floats for columns:\n"
                       "%s\n\n"
                       "To fix, specify dtypes manually by adding:\n\n"
                       "%s\n\n"
                       "to the call to `read_csv`/`read_table`.\n\n"
                       "Alternatively, provide `assume_missing=True` to "
                       "interpret all unspecified integer columns as floats.")

                missing_list = '\n'.join('- %r' % c for c in mismatched)
                dtype_list = ('%r: float' % c for c in mismatched)
                missing_dict = 'dtype={%s}' % ',\n       '.join(dtype_list)
                raise ValueError(msg % (missing_list, missing_dict))

            df[c] = df[c].astype(dtypes[c])
예제 #3
0
파일: csv.py 프로젝트: wikiped/dask
def coerce_dtypes(df, dtypes):
    """ Coerce dataframe to dtypes safely

    Operates in place

    Parameters
    ----------
    df: Pandas DataFrame
    dtypes: dict like {'x': float}
    """
    for c in df.columns:
        if c in dtypes and df.dtypes[c] != dtypes[c]:
            if is_float_dtype(df.dtypes[c]) and is_integer_dtype(dtypes[c]):
                # There is a mismatch between floating and integer columns.
                # Determine all mismatched and error.
                mismatched = sorted(c for c in df.columns if
                                    is_float_dtype(df.dtypes[c]) and
                                    is_integer_dtype(dtypes[c]))

                msg = ("Mismatched dtypes found.\n"
                       "Expected integers, but found floats for columns:\n"
                       "%s\n\n"
                       "To fix, specify dtypes manually by adding:\n\n"
                       "%s\n\n"
                       "to the call to `read_csv`/`read_table`.\n\n"
                       "Alternatively, provide `assume_missing=True` to "
                       "interpret all unspecified integer columns as floats.")

                missing_list = '\n'.join('- %r' % c for c in mismatched)
                dtype_list = ('%r: float' % c for c in mismatched)
                missing_dict = 'dtype={%s}' % ',\n       '.join(dtype_list)
                raise ValueError(msg % (missing_list, missing_dict))

            df[c] = df[c].astype(dtypes[c])
예제 #4
0
def _sparse_array_op(left, right, op, name, series=False):

    if series and is_integer_dtype(left) and is_integer_dtype(right):
        # series coerces to float64 if result should have NaN/inf
        if name in ('floordiv', 'mod') and (right.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)
        elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)

    dtype = _maybe_match_dtype(left, right)
    result_dtype = None

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        with np.errstate(all='ignore'):
            result = op(left.get_values(), right.get_values())
            fill = op(_get_fill(left), _get_fill(right))

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
    elif left.sp_index.equals(right.sp_index):
        with np.errstate(all='ignore'):
            result = op(left.sp_values, right.sp_values)
            fill = op(_get_fill(left), _get_fill(right))
        index = left.sp_index
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        if name in ('and', 'or') and dtype == 'bool':
            opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype)
            # to make template simple, cast here
            left_sp_values = left.sp_values.view(np.uint8)
            right_sp_values = right.sp_values.view(np.uint8)
            result_dtype = np.bool
        else:
            opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
            left_sp_values = left.sp_values
            right_sp_values = right.sp_values

        sparse_op = getattr(splib, opname)
        with np.errstate(all='ignore'):
            result, index, fill = sparse_op(left_sp_values, left.sp_index,
                                            left.fill_value, right_sp_values,
                                            right.sp_index, right.fill_value)

    if result_dtype is None:
        result_dtype = result.dtype

    return _wrap_result(name, result, index, fill, dtype=result_dtype)
예제 #5
0
    def _delegate_property_get(self, name):
        from pandas import Series

        result = getattr(self.values, name)

        # maybe need to upcast (ints)
        if isinstance(result, np.ndarray):
            if is_integer_dtype(result):
                result = result.astype('int64')
        elif not is_list_like(result):
            return result

        result = np.asarray(result)

        # blow up if we operate on categories
        if self.orig is not None:
            result = take_1d(result, self.orig.cat.codes)

        # return the result as a Series, which is by definition a copy
        result = Series(result, index=self.index, name=self.name)

        # setting this object will show a SettingWithCopyWarning/Error
        result.is_copy = ("modifications to a property of a datetimelike "
                          "object are not supported and are discarded. "
                          "Change values on the original.")

        return result
예제 #6
0
 def _maybe_convert_timedelta(self, other):
     if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)):
         offset = frequencies.to_offset(self.freq.rule_code)
         if isinstance(offset, offsets.Tick):
             nanos = tslib._delta_to_nanoseconds(other)
             offset_nanos = tslib._delta_to_nanoseconds(offset)
             if nanos % offset_nanos == 0:
                 return nanos // offset_nanos
     elif isinstance(other, offsets.DateOffset):
         freqstr = other.rule_code
         base = frequencies.get_base_alias(freqstr)
         if base == self.freq.rule_code:
             return other.n
         msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
         raise IncompatibleFrequency(msg)
     elif isinstance(other, np.ndarray):
         if is_integer_dtype(other):
             return other
         elif is_timedelta64_dtype(other):
             offset = frequencies.to_offset(self.freq)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
                 offset_nanos = tslib._delta_to_nanoseconds(offset)
                 if (nanos % offset_nanos).all() == 0:
                     return nanos // offset_nanos
     elif is_integer(other):
         # integer is passed to .shift via
         # _add_datetimelike_methods basically
         # but ufunc may pass integer to _add_delta
         return other
     # raise when input doesn't have freq
     msg = "Input has different freq from PeriodIndex(freq={0})"
     raise IncompatibleFrequency(msg.format(self.freqstr))
예제 #7
0
def _get_data_algo(values, func_map):

    f = None
    if is_float_dtype(values):
        f = func_map['float64']
        values = _ensure_float64(values)

    elif needs_i8_conversion(values):
        f = func_map['int64']
        values = values.view('i8')

    elif is_integer_dtype(values):
        f = func_map['int64']
        values = _ensure_int64(values)
    else:

        values = _ensure_object(values)

        # its cheaper to use a String Hash Table than Object
        if lib.infer_dtype(values) in ['string']:
            try:
                f = func_map['string']
            except KeyError:
                pass

    if f is None:
        f = func_map['generic']

    return f, values
예제 #8
0
def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
    """Convert a list of objects to a timedelta index object."""

    if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
        arg = np.array(list(arg), dtype='O')

    # these are shortcut-able
    if is_timedelta64_dtype(arg):
        value = arg.astype('timedelta64[ns]')
    elif is_integer_dtype(arg):
        value = arg.astype('timedelta64[{0}]'.format(unit)).astype(
            'timedelta64[ns]', copy=False)
    else:
        try:
            value = tslib.array_to_timedelta64(_ensure_object(arg),
                                               unit=unit,
                                               errors=errors)
            value = value.astype('timedelta64[ns]', copy=False)
        except ValueError:
            if errors == 'ignore':
                return arg
            else:
                # This else-block accounts for the cases when errors='raise'
                # and errors='coerce'. If errors == 'raise', these errors
                # should be raised. If errors == 'coerce', we shouldn't
                # expect any errors to be raised, since all parsing errors
                # cause coercion to pd.NaT. However, if an error / bug is
                # introduced that causes an Exception to be raised, we would
                # like to surface it.
                raise

    if box:
        from pandas import TimedeltaIndex
        value = TimedeltaIndex(value, unit='ns', name=name)
    return value
예제 #9
0
def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values)
            or is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
예제 #10
0
    def _delegate_property_get(self, name):
        from pandas import Series

        result = getattr(self.values, name)

        # maybe need to upcast (ints)
        if isinstance(result, np.ndarray):
            if is_integer_dtype(result):
                result = result.astype('int64')
        elif not is_list_like(result):
            return result

        # blow up if we operate on categories
        if self.orig is not None:
            result = take_1d(result, self.orig.cat.codes)

        # return the result as a Series, which is by definition a copy
        result = Series(result, index=self.index, name=self.name)

        # setting this object will show a SettingWithCopyWarning/Error
        result.is_copy = ("modifications to a property of a datetimelike "
                          "object are not supported and are discarded. "
                          "Change values on the original.")

        return result
예제 #11
0
파일: period.py 프로젝트: DGrady/pandas
 def _maybe_convert_timedelta(self, other):
     if isinstance(other, (timedelta, np.timedelta64,
                           offsets.Tick, Timedelta)):
         offset = frequencies.to_offset(self.freq.rule_code)
         if isinstance(offset, offsets.Tick):
             nanos = tslib._delta_to_nanoseconds(other)
             offset_nanos = tslib._delta_to_nanoseconds(offset)
             if nanos % offset_nanos == 0:
                 return nanos // offset_nanos
     elif isinstance(other, offsets.DateOffset):
         freqstr = other.rule_code
         base = frequencies.get_base_alias(freqstr)
         if base == self.freq.rule_code:
             return other.n
         msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
         raise IncompatibleFrequency(msg)
     elif isinstance(other, np.ndarray):
         if is_integer_dtype(other):
             return other
         elif is_timedelta64_dtype(other):
             offset = frequencies.to_offset(self.freq)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
                 offset_nanos = tslib._delta_to_nanoseconds(offset)
                 if (nanos % offset_nanos).all() == 0:
                     return nanos // offset_nanos
     # raise when input doesn't have freq
     msg = "Input has different freq from PeriodIndex(freq={0})"
     raise IncompatibleFrequency(msg.format(self.freqstr))
예제 #12
0
 def _convert_arr_indexer(self, keyarr):
     # Cast the indexer to uint64 if possible so
     # that the values returned from indexing are
     # also uint64.
     if is_integer_dtype(keyarr):
         return _asarray_tuplesafe(keyarr, dtype=np.uint64)
     return keyarr
예제 #13
0
 def _convert_arr_indexer(self, keyarr):
     # Cast the indexer to uint64 if possible so
     # that the values returned from indexing are
     # also uint64.
     if is_integer_dtype(keyarr):
         return _asarray_tuplesafe(keyarr, dtype=np.uint64)
     return keyarr
예제 #14
0
def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values) or
            is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
예제 #15
0
def backfill_2d(values, limit=None, mask=None, dtype=None):

    if dtype is None:
        dtype = values.dtype
    _method = None
    if is_float_dtype(values):
        _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        _method = _backfill_2d_datetime
    elif is_integer_dtype(values):
        values = _ensure_float64(values)
        _method = algos.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_2d_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name)

    if mask is None:
        mask = isnull(values)
    mask = mask.view(np.uint8)

    if np.all(values.shape):
        _method(values, mask, limit=limit)
    else:
        # for test coverage
        pass
    return values
예제 #16
0
def mode(values):
    """Returns the mode or mode(s) of the passed Series or ndarray (sorted)"""
    # must sort because hash order isn't necessarily defined.
    from pandas.core.series import Series

    if isinstance(values, Series):
        constructor = values._constructor
        values = values.values
    else:
        values = np.asanyarray(values)
        constructor = Series

    dtype = values.dtype
    if is_integer_dtype(values):
        values = _ensure_int64(values)
        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)

    elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
        dtype = values.dtype
        values = values.view(np.int64)
        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)

    elif is_categorical_dtype(values):
        result = constructor(values.mode())
    else:
        mask = isnull(values)
        values = _ensure_object(values)
        res = htable.mode_object(values, mask)
        try:
            res = sorted(res)
        except TypeError as e:
            warn("Unable to sort modes: %s" % e)
        result = constructor(res, dtype=dtype)

    return result
예제 #17
0
def _get_data_algo(values, func_map):

    f = None
    if is_float_dtype(values):
        f = func_map['float64']
        values = _ensure_float64(values)

    elif needs_i8_conversion(values):
        f = func_map['int64']
        values = values.view('i8')

    elif is_integer_dtype(values):
        f = func_map['int64']
        values = _ensure_int64(values)
    else:

        values = _ensure_object(values)

        # its cheaper to use a String Hash Table than Object
        if lib.infer_dtype(values) in ['string']:
            try:
                f = func_map['string']
            except KeyError:
                pass

    if f is None:
        f = func_map['generic']

    return f, values
예제 #18
0
파일: array.py 프로젝트: MattRijk/pandas
    def _simple_new(cls, data, sp_index, fill_value):
        if not isinstance(sp_index, SparseIndex):
            # caller must pass SparseIndex
            raise ValueError('sp_index must be a SparseIndex')

        if fill_value is None:
            if sp_index.ngaps > 0:
                # has missing hole
                fill_value = np.nan
            else:
                fill_value = na_value_for_dtype(data.dtype)

        if (is_integer_dtype(data) and is_float(fill_value) and
           sp_index.ngaps > 0):
            # if float fill_value is being included in dense repr,
            # convert values to float
            data = data.astype(float)

        result = data.view(cls)

        if not isinstance(sp_index, SparseIndex):
            # caller must pass SparseIndex
            raise ValueError('sp_index must be a SparseIndex')

        result.sp_index = sp_index
        result._fill_value = fill_value
        return result
예제 #19
0
def backfill_2d(values, limit=None, mask=None, dtype=None):

    if dtype is None:
        dtype = values.dtype
    _method = None
    if is_float_dtype(values):
        _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        _method = _backfill_2d_datetime
    elif is_integer_dtype(values):
        values = _ensure_float64(values)
        _method = algos.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_2d_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name)

    if mask is None:
        mask = isnull(values)
    mask = mask.view(np.uint8)

    if np.all(values.shape):
        _method(values, mask, limit=limit)
    else:
        # for test coverage
        pass
    return values
예제 #20
0
def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
    """Convert a list of objects to a timedelta index object."""

    if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
        arg = np.array(list(arg), dtype='O')

    # these are shortcut-able
    if is_timedelta64_dtype(arg):
        value = arg.astype('timedelta64[ns]')
    elif is_integer_dtype(arg):
        value = arg.astype('timedelta64[{0}]'.format(
            unit)).astype('timedelta64[ns]', copy=False)
    else:
        try:
            value = tslib.array_to_timedelta64(_ensure_object(arg),
                                               unit=unit, errors=errors)
            value = value.astype('timedelta64[ns]', copy=False)
        except ValueError:
            if errors == 'ignore':
                return arg
            else:
                # This else-block accounts for the cases when errors='raise'
                # and errors='coerce'. If errors == 'raise', these errors
                # should be raised. If errors == 'coerce', we shouldn't
                # expect any errors to be raised, since all parsing errors
                # cause coercion to pd.NaT. However, if an error / bug is
                # introduced that causes an Exception to be raised, we would
                # like to surface it.
                raise

    if box:
        from pandas import TimedeltaIndex
        value = TimedeltaIndex(value, unit='ns', name=name)
    return value
예제 #21
0
def mode(values):
    """Returns the mode or mode(s) of the passed Series or ndarray (sorted)"""
    # must sort because hash order isn't necessarily defined.
    from pandas.core.series import Series

    if isinstance(values, Series):
        constructor = values._constructor
        values = values.values
    else:
        values = np.asanyarray(values)
        constructor = Series

    dtype = values.dtype
    if is_integer_dtype(values):
        values = _ensure_int64(values)
        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)

    elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
        dtype = values.dtype
        values = values.view(np.int64)
        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)

    elif is_categorical_dtype(values):
        result = constructor(values.mode())
    else:
        mask = isnull(values)
        values = _ensure_object(values)
        res = htable.mode_object(values, mask)
        try:
            res = sorted(res)
        except TypeError as e:
            warn("Unable to sort modes: %s" % e)
        result = constructor(res, dtype=dtype)

    return result
예제 #22
0
파일: array.py 프로젝트: zebartol/pandas
    def _simple_new(cls, data, sp_index, fill_value):
        if not isinstance(sp_index, SparseIndex):
            # caller must pass SparseIndex
            raise ValueError('sp_index must be a SparseIndex')

        if fill_value is None:
            if sp_index.ngaps > 0:
                # has missing hole
                fill_value = np.nan
            else:
                fill_value = na_value_for_dtype(data.dtype)

        if (is_integer_dtype(data) and is_float(fill_value)
                and sp_index.ngaps > 0):
            # if float fill_value is being included in dense repr,
            # convert values to float
            data = data.astype(float)

        result = data.view(cls)

        if not isinstance(sp_index, SparseIndex):
            # caller must pass SparseIndex
            raise ValueError('sp_index must be a SparseIndex')

        result.sp_index = sp_index
        result._fill_value = fill_value
        return result
예제 #23
0
 def get_expected(s, name):
     result = getattr(Index(s._values), prop)
     if isinstance(result, np.ndarray):
         if is_integer_dtype(result):
             result = result.astype('int64')
     elif not is_list_like(result):
         return result
     return Series(result, index=s.index, name=s.name)
예제 #24
0
    def coerce(values):
        # we allow coercion to if errors allows
        values = to_numeric(values, errors=errors)

        # prevent overflow in case of int8 or int16
        if is_integer_dtype(values):
            values = values.astype('int64', copy=False)
        return values
예제 #25
0
 def get_expected(s, name):
     result = getattr(Index(s._values), prop)
     if isinstance(result, np.ndarray):
         if is_integer_dtype(result):
             result = result.astype('int64')
     elif not is_list_like(result):
         return result
     return Series(result, index=s.index, name=s.name)
예제 #26
0
    def coerce(values):
        # we allow coercion to if errors allows
        values = to_numeric(values, errors=errors)

        # prevent overflow in case of int8 or int16
        if is_integer_dtype(values):
            values = values.astype('int64', copy=False)
        return values
예제 #27
0
파일: period.py 프로젝트: DGrady/pandas
 def astype(self, dtype, copy=True):
     dtype = np.dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy), name=self.name,
                      dtype='i8')
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
예제 #28
0
    def test_setitem_dtype_upcast(self):

        # GH3216
        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
        df['c'] = np.nan
        self.assertEqual(df['c'].dtype, np.float64)

        df.loc[0, 'c'] = 'foo'
        expected = DataFrame([{
            "a": 1,
            "c": 'foo'
        }, {
            "a": 3,
            "b": 2,
            "c": np.nan
        }])
        tm.assert_frame_equal(df, expected)

        # GH10280
        df = DataFrame(np.arange(6, dtype='int64').reshape(2, 3),
                       index=list('ab'),
                       columns=['foo', 'bar', 'baz'])

        for val in [3.14, 'wxyz']:
            left = df.copy()
            left.loc['a', 'bar'] = val
            right = DataFrame([[0, val, 2], [3, 4, 5]],
                              index=list('ab'),
                              columns=['foo', 'bar', 'baz'])

            tm.assert_frame_equal(left, right)
            self.assertTrue(is_integer_dtype(left['foo']))
            self.assertTrue(is_integer_dtype(left['baz']))

        left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0,
                         index=list('ab'),
                         columns=['foo', 'bar', 'baz'])
        left.loc['a', 'bar'] = 'wxyz'

        right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]],
                          index=list('ab'),
                          columns=['foo', 'bar', 'baz'])

        tm.assert_frame_equal(left, right)
        self.assertTrue(is_float_dtype(left['foo']))
        self.assertTrue(is_float_dtype(left['baz']))
예제 #29
0
 def astype(self, dtype, copy=True):
     dtype = np.dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy), name=self.name,
                      dtype='i8')
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
예제 #30
0
def _value_counts_arraylike(values, dropna=True):
    is_datetimetz_type = is_datetimetz(values)
    is_period = (isinstance(values, ABCPeriodIndex)
                 or is_period_arraylike(values))

    orig = values

    from pandas.core.series import Series
    values = Series(values).values
    dtype = values.dtype

    if is_datetime_or_timedelta_dtype(dtype) or is_period:
        from pandas.tseries.index import DatetimeIndex
        from pandas.tseries.period import PeriodIndex

        if is_period:
            values = PeriodIndex(values)
            freq = values.freq

        values = values.view(np.int64)
        keys, counts = htable.value_count_int64(values, dropna)

        if dropna:
            msk = keys != iNaT
            keys, counts = keys[msk], counts[msk]

        # convert the keys back to the dtype we came in
        keys = keys.astype(dtype)

        # dtype handling
        if is_datetimetz_type:
            if isinstance(orig, ABCDatetimeIndex):
                tz = orig.tz
            else:
                tz = orig.dt.tz
            keys = DatetimeIndex._simple_new(keys, tz=tz)
        if is_period:
            keys = PeriodIndex._simple_new(keys, freq=freq)

    elif is_integer_dtype(dtype):
        values = _ensure_int64(values)
        keys, counts = htable.value_count_int64(values, dropna)
    elif is_float_dtype(dtype):
        values = _ensure_float64(values)
        keys, counts = htable.value_count_float64(values, dropna)
    else:
        values = _ensure_object(values)
        mask = isnull(values)
        keys, counts = htable.value_count_object(values, mask)
        if not dropna and mask.any():
            keys = np.insert(keys, 0, np.NaN)
            counts = np.insert(counts, 0, mask.sum())

    return keys, counts
예제 #31
0
def _value_counts_arraylike(values, dropna=True):
    is_datetimetz_type = is_datetimetz(values)
    is_period = (isinstance(values, ABCPeriodIndex) or
                 is_period_arraylike(values))

    orig = values

    from pandas.core.series import Series
    values = Series(values).values
    dtype = values.dtype

    if is_datetime_or_timedelta_dtype(dtype) or is_period:
        from pandas.tseries.index import DatetimeIndex
        from pandas.tseries.period import PeriodIndex

        if is_period:
            values = PeriodIndex(values)
            freq = values.freq

        values = values.view(np.int64)
        keys, counts = htable.value_count_scalar64(values, dropna)

        if dropna:
            msk = keys != iNaT
            keys, counts = keys[msk], counts[msk]

        # convert the keys back to the dtype we came in
        keys = keys.astype(dtype)

        # dtype handling
        if is_datetimetz_type:
            if isinstance(orig, ABCDatetimeIndex):
                tz = orig.tz
            else:
                tz = orig.dt.tz
            keys = DatetimeIndex._simple_new(keys, tz=tz)
        if is_period:
            keys = PeriodIndex._simple_new(keys, freq=freq)

    elif is_integer_dtype(dtype):
        values = _ensure_int64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    elif is_float_dtype(dtype):
        values = _ensure_float64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    else:
        values = _ensure_object(values)
        mask = isnull(values)
        keys, counts = htable.value_count_object(values, mask)
        if not dropna and mask.any():
            keys = np.insert(keys, 0, np.NaN)
            counts = np.insert(counts, 0, mask.sum())

    return keys, counts
예제 #32
0
파일: period.py 프로젝트: wakamori/pandas
    def _simple_new(cls, values, name=None, freq=None, **kwargs):
        """
        Values can be any type that can be coerced to Periods.
        Ordinals in an ndarray are fastpath-ed to `_from_ordinals`
        """
        if not is_integer_dtype(values):
            values = np.array(values, copy=False)
            if len(values) > 0 and is_float_dtype(values):
                raise TypeError("PeriodIndex can't take floats")
            return cls(values, name=name, freq=freq, **kwargs)

        return cls._from_ordinals(values, name, freq, **kwargs)
예제 #33
0
def fill_zeros(result, x, y, name, fill):
    """
    if this is a reversed op, then flip x,y

    if we have an integer value (or array in y)
    and we have 0's, fill them with the fill,
    return the result

    mask the nan's from x
    """
    if fill is None or is_float_dtype(result):
        return result

    if name.startswith(('r', '__r')):
        x, y = y, x

    is_variable_type = (hasattr(y, 'dtype') or hasattr(y, 'type'))
    is_scalar_type = is_scalar(y)

    if not is_variable_type and not is_scalar_type:
        return result

    if is_scalar_type:
        y = np.array(y)

    if is_integer_dtype(y):

        if (y == 0).any():

            # GH 7325, mask and nans must be broadcastable (also: PR 9308)
            # Raveling and then reshaping makes np.putmask faster
            mask = ((y == 0) & ~np.isnan(result)).ravel()

            shape = result.shape
            result = result.astype('float64', copy=False).ravel()

            np.putmask(result, mask, fill)

            # if we have a fill of inf, then sign it correctly
            # (GH 6178 and PR 9308)
            if np.isinf(fill):
                signs = np.sign(y if name.startswith(('r', '__r')) else x)
                negative_inf_mask = (signs.ravel() < 0) & mask
                np.putmask(result, negative_inf_mask, -fill)

            if "floordiv" in name:  # (PR 9308)
                nan_mask = ((y == 0) & (x == 0)).ravel()
                np.putmask(result, nan_mask, np.nan)

            result = result.reshape(shape)

    return result
예제 #34
0
def fill_zeros(result, x, y, name, fill):
    """
    if this is a reversed op, then flip x,y

    if we have an integer value (or array in y)
    and we have 0's, fill them with the fill,
    return the result

    mask the nan's from x
    """
    if fill is None or is_float_dtype(result):
        return result

    if name.startswith(('r', '__r')):
        x, y = y, x

    is_variable_type = (hasattr(y, 'dtype') or hasattr(y, 'type'))
    is_scalar_type = is_scalar(y)

    if not is_variable_type and not is_scalar_type:
        return result

    if is_scalar_type:
        y = np.array(y)

    if is_integer_dtype(y):

        if (y == 0).any():

            # GH 7325, mask and nans must be broadcastable (also: PR 9308)
            # Raveling and then reshaping makes np.putmask faster
            mask = ((y == 0) & ~np.isnan(result)).ravel()

            shape = result.shape
            result = result.astype('float64', copy=False).ravel()

            np.putmask(result, mask, fill)

            # if we have a fill of inf, then sign it correctly
            # (GH 6178 and PR 9308)
            if np.isinf(fill):
                signs = np.sign(y if name.startswith(('r', '__r')) else x)
                negative_inf_mask = (signs.ravel() < 0) & mask
                np.putmask(result, negative_inf_mask, -fill)

            if "floordiv" in name:  # (PR 9308)
                nan_mask = ((y == 0) & (x == 0)).ravel()
                np.putmask(result, nan_mask, np.nan)

            result = result.reshape(shape)

    return result
예제 #35
0
def _get_prev_label(label):
    dtype = getattr(label, 'dtype', type(label))
    if isinstance(label, (Timestamp, Timedelta)):
        dtype = 'datetime64'
    if is_datetime_or_timedelta_dtype(dtype):
        return label - np.timedelta64(1, 'ns')
    elif is_integer_dtype(dtype):
        return label - 1
    elif is_float_dtype(dtype):
        return np.nextafter(label, -np.infty)
    else:
        raise TypeError('cannot determine next label for type %r' %
                        type(label))
예제 #36
0
    def test_setitem_dtype_upcast(self):

        # GH3216
        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
        df['c'] = np.nan
        self.assertEqual(df['c'].dtype, np.float64)

        df.loc[0, 'c'] = 'foo'
        expected = DataFrame([{"a": 1, "c": 'foo'},
                              {"a": 3, "b": 2, "c": np.nan}])
        tm.assert_frame_equal(df, expected)

        # GH10280
        df = DataFrame(np.arange(6, dtype='int64').reshape(2, 3),
                       index=list('ab'),
                       columns=['foo', 'bar', 'baz'])

        for val in [3.14, 'wxyz']:
            left = df.copy()
            left.loc['a', 'bar'] = val
            right = DataFrame([[0, val, 2], [3, 4, 5]], index=list('ab'),
                              columns=['foo', 'bar', 'baz'])

            tm.assert_frame_equal(left, right)
            self.assertTrue(is_integer_dtype(left['foo']))
            self.assertTrue(is_integer_dtype(left['baz']))

        left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0,
                         index=list('ab'),
                         columns=['foo', 'bar', 'baz'])
        left.loc['a', 'bar'] = 'wxyz'

        right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]], index=list('ab'),
                          columns=['foo', 'bar', 'baz'])

        tm.assert_frame_equal(left, right)
        self.assertTrue(is_float_dtype(left['foo']))
        self.assertTrue(is_float_dtype(left['baz']))
예제 #37
0
파일: period.py 프로젝트: smoofra/pandas
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy), name=self.name,
                      dtype='i8')
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
예제 #38
0
파일: array.py 프로젝트: sunghopark1/pandas
def _sparse_array_op(left, right, op, name, series=False):

    if series and is_integer_dtype(left) and is_integer_dtype(right):
        # series coerces to float64 if result should have NaN/inf
        if name in ('floordiv', 'mod') and (right.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)
        elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)

    dtype = _maybe_match_dtype(left, right)

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        result = op(left.get_values(), right.get_values())

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
        fill = op(_get_fill(left), _get_fill(right))
    elif left.sp_index.equals(right.sp_index):
        result = op(left.sp_values, right.sp_values)
        index = left.sp_index
        fill = op(_get_fill(left), _get_fill(right))
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
        sparse_op = getattr(splib, opname)

        result, index, fill = sparse_op(left.sp_values, left.sp_index,
                                        left.fill_value, right.sp_values,
                                        right.sp_index, right.fill_value)
    return _wrap_result(name, result, index, fill, dtype=result.dtype)
예제 #39
0
def _sparse_array_op(left, right, op, name, series=False):

    if series and is_integer_dtype(left) and is_integer_dtype(right):
        # series coerces to float64 if result should have NaN/inf
        if name in ('floordiv', 'mod') and (right.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)
        elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)

    dtype = _maybe_match_dtype(left, right)

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        result = op(left.get_values(), right.get_values())

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
        fill = op(_get_fill(left), _get_fill(right))
    elif left.sp_index.equals(right.sp_index):
        result = op(left.sp_values, right.sp_values)
        index = left.sp_index
        fill = op(_get_fill(left), _get_fill(right))
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
        sparse_op = getattr(splib, opname)

        result, index, fill = sparse_op(left.sp_values, left.sp_index,
                                        left.fill_value, right.sp_values,
                                        right.sp_index, right.fill_value)
    return _wrap_result(name, result, index, fill, dtype=result.dtype)
예제 #40
0
    def _simple_new(cls,
                    left,
                    right,
                    closed=None,
                    name=None,
                    copy=False,
                    verify_integrity=True):
        result = IntervalMixin.__new__(cls)

        if closed is None:
            closed = 'right'
        left = _ensure_index(left, copy=copy)
        right = _ensure_index(right, copy=copy)

        # coerce dtypes to match if needed
        if is_float_dtype(left) and is_integer_dtype(right):
            right = right.astype(left.dtype)
        if is_float_dtype(right) and is_integer_dtype(left):
            left = left.astype(right.dtype)

        if type(left) != type(right):
            raise ValueError("must not have differing left [{}] "
                             "and right [{}] types".format(
                                 type(left), type(right)))

        if isinstance(left, ABCPeriodIndex):
            raise ValueError("Period dtypes are not supported, "
                             "use a PeriodIndex instead")

        result._left = left
        result._right = right
        result._closed = closed
        result.name = name
        if verify_integrity:
            result._validate()
        result._reset_identity()
        return result
예제 #41
0
    def _simple_new(cls, data, sp_index, fill_value):
        if is_integer_dtype(data) and is_float(fill_value) and sp_index.ngaps > 0:
            # if float fill_value is being included in dense repr,
            # convert values to float
            data = data.astype(float)

        result = data.view(cls)

        if not isinstance(sp_index, SparseIndex):
            # caller must pass SparseIndex
            raise ValueError("sp_index must be a SparseIndex")

        result.sp_index = sp_index
        result.fill_value = fill_value
        return result
예제 #42
0
파일: numeric.py 프로젝트: orivej/pandas
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
예제 #43
0
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
예제 #44
0
def _get_values(values,
                skipna,
                fill_value=None,
                fill_value_typ=None,
                isfinite=False,
                copy=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype,
                                 fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = _maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max
예제 #45
0
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         if copy:
             return self._int64index.copy()
         else:
             return self._int64index
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
예제 #46
0
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         if copy:
             return self._int64index.copy()
         else:
             return self._int64index
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
예제 #47
0
def _hashtable_algo(f, dtype, return_dtype=None):
    """
    f(HashTable, type_caster) -> result
    """
    if is_float_dtype(dtype):
        return f(htable.Float64HashTable, _ensure_float64)
    elif is_integer_dtype(dtype):
        return f(htable.Int64HashTable, _ensure_int64)
    elif is_datetime64_dtype(dtype):
        return_dtype = return_dtype or 'M8[ns]'
        return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)
    elif is_timedelta64_dtype(dtype):
        return_dtype = return_dtype or 'm8[ns]'
        return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)
    else:
        return f(htable.PyObjectHashTable, _ensure_object)
예제 #48
0
def _get_data_algo(values, func_map):
    if is_float_dtype(values):
        f = func_map['float64']
        values = _ensure_float64(values)

    elif needs_i8_conversion(values):
        f = func_map['int64']
        values = values.view('i8')

    elif is_integer_dtype(values):
        f = func_map['int64']
        values = _ensure_int64(values)
    else:
        f = func_map['generic']
        values = _ensure_object(values)
    return f, values
예제 #49
0
def _get_data_algo(values, func_map):
    if is_float_dtype(values):
        f = func_map['float64']
        values = _ensure_float64(values)

    elif needs_i8_conversion(values):
        f = func_map['int64']
        values = values.view('i8')

    elif is_integer_dtype(values):
        f = func_map['int64']
        values = _ensure_int64(values)
    else:
        f = func_map['generic']
        values = _ensure_object(values)
    return f, values
예제 #50
0
def _hashtable_algo(f, dtype, return_dtype=None):
    """
    f(HashTable, type_caster) -> result
    """
    if is_float_dtype(dtype):
        return f(htable.Float64HashTable, _ensure_float64)
    elif is_integer_dtype(dtype):
        return f(htable.Int64HashTable, _ensure_int64)
    elif is_datetime64_dtype(dtype):
        return_dtype = return_dtype or 'M8[ns]'
        return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)
    elif is_timedelta64_dtype(dtype):
        return_dtype = return_dtype or 'm8[ns]'
        return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)
    else:
        return f(htable.PyObjectHashTable, _ensure_object)
예제 #51
0
def as_json_table_type(x):
    """
    Convert a NumPy / pandas type to its corresponding json_table.

    Parameters
    ----------
    x : array or dtype

    Returns
    -------
    t : str
        the Table Schema data types

    Notes
    -----
    This table shows the relationship between NumPy / pandas dtypes,
    and Table Schema dtypes.

    ==============  =================
    Pandas type     Table Schema type
    ==============  =================
    int64           integer
    float64         number
    bool            boolean
    datetime64[ns]  datetime
    timedelta64[ns] duration
    object          str
    categorical     any
    =============== =================
    """
    if is_integer_dtype(x):
        return 'integer'
    elif is_bool_dtype(x):
        return 'boolean'
    elif is_numeric_dtype(x):
        return 'number'
    elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or
          is_period_dtype(x)):
        return 'datetime'
    elif is_timedelta64_dtype(x):
        return 'duration'
    elif is_categorical_dtype(x):
        return 'any'
    elif is_string_dtype(x):
        return 'string'
    else:
        return 'any'
예제 #52
0
def as_json_table_type(x):
    """
    Convert a NumPy / pandas type to its corresponding json_table.

    Parameters
    ----------
    x : array or dtype

    Returns
    -------
    t : str
        the Table Schema data types

    Notes
    -----
    This table shows the relationship between NumPy / pandas dtypes,
    and Table Schema dtypes.

    ==============  =================
    Pandas type     Table Schema type
    ==============  =================
    int64           integer
    float64         number
    bool            boolean
    datetime64[ns]  datetime
    timedelta64[ns] duration
    object          str
    categorical     any
    =============== =================
    """
    if is_integer_dtype(x):
        return 'integer'
    elif is_bool_dtype(x):
        return 'boolean'
    elif is_numeric_dtype(x):
        return 'number'
    elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)
          or is_period_dtype(x)):
        return 'datetime'
    elif is_timedelta64_dtype(x):
        return 'duration'
    elif is_categorical_dtype(x):
        return 'any'
    elif is_string_dtype(x):
        return 'string'
    else:
        return 'any'
예제 #53
0
def duplicated(values, keep='first'):
    """
    Return boolean ndarray denoting duplicate values

    .. versionadded:: 0.19.0

    Parameters
    ----------
    keep : {'first', 'last', False}, default 'first'
        - ``first`` : Mark duplicates as ``True`` except for the first
          occurrence.
        - ``last`` : Mark duplicates as ``True`` except for the last
          occurrence.
        - False : Mark all duplicates as ``True``.

    Returns
    -------
    duplicated : ndarray
    """

    dtype = values.dtype

    # no need to revert to original type
    if is_datetime_or_timedelta_dtype(dtype) or is_datetimetz(dtype):
        if isinstance(values, (ABCSeries, ABCIndex)):
            values = values.values.view(np.int64)
        else:
            values = values.view(np.int64)
    elif is_period_arraylike(values):
        from pandas.tseries.period import PeriodIndex
        values = PeriodIndex(values).asi8
    elif is_categorical_dtype(dtype):
        values = values.values.codes
    elif isinstance(values, (ABCSeries, ABCIndex)):
        values = values.values

    if is_integer_dtype(dtype):
        values = _ensure_int64(values)
        duplicated = htable.duplicated_int64(values, keep=keep)
    elif is_float_dtype(dtype):
        values = _ensure_float64(values)
        duplicated = htable.duplicated_float64(values, keep=keep)
    else:
        values = _ensure_object(values)
        duplicated = htable.duplicated_object(values, keep=keep)

    return duplicated
예제 #54
0
def duplicated(values, keep='first'):
    """
    Return boolean ndarray denoting duplicate values

    .. versionadded:: 0.19.0

    Parameters
    ----------
    keep : {'first', 'last', False}, default 'first'
        - ``first`` : Mark duplicates as ``True`` except for the first
          occurrence.
        - ``last`` : Mark duplicates as ``True`` except for the last
          occurrence.
        - False : Mark all duplicates as ``True``.

    Returns
    -------
    duplicated : ndarray
    """

    dtype = values.dtype

    # no need to revert to original type
    if is_datetime_or_timedelta_dtype(dtype) or is_datetimetz(dtype):
        if isinstance(values, (ABCSeries, ABCIndex)):
            values = values.values.view(np.int64)
        else:
            values = values.view(np.int64)
    elif is_period_arraylike(values):
        from pandas.tseries.period import PeriodIndex
        values = PeriodIndex(values).asi8
    elif is_categorical_dtype(dtype):
        values = values.values.codes
    elif isinstance(values, (ABCSeries, ABCIndex)):
        values = values.values

    if is_integer_dtype(dtype):
        values = _ensure_int64(values)
        duplicated = htable.duplicated_int64(values, keep=keep)
    elif is_float_dtype(dtype):
        values = _ensure_float64(values)
        duplicated = htable.duplicated_float64(values, keep=keep)
    else:
        values = _ensure_object(values)
        duplicated = htable.duplicated_object(values, keep=keep)

    return duplicated
예제 #55
0
    def astype(self, dtype, copy=True):
        dtype = np.dtype(dtype)

        if is_object_dtype(dtype):
            return self.asobject
        elif is_timedelta64_ns_dtype(dtype):
            if copy is True:
                return self.copy()
            return self
        elif is_timedelta64_dtype(dtype):
            # return an index (essentially this is division)
            result = self.values.astype(dtype, copy=copy)
            if self.hasnans:
                return Index(self._maybe_mask_results(result, convert="float64"), name=self.name)
            return Index(result.astype("i8"), name=self.name)
        elif is_integer_dtype(dtype):
            return Index(self.values.astype("i8", copy=copy), dtype="i8", name=self.name)
        raise ValueError("Cannot cast TimedeltaIndex to dtype %s" % dtype)
예제 #56
0
    def _simple_new(cls, values, name=None, freq=None, **kwargs):

        if not is_integer_dtype(values):
            values = np.array(values, copy=False)
            if (len(values) > 0 and is_float_dtype(values)):
                raise TypeError("PeriodIndex can't take floats")
            else:
                return PeriodIndex(values, name=name, freq=freq, **kwargs)

        values = np.array(values, dtype='int64', copy=False)

        result = object.__new__(cls)
        result._data = values
        result.name = name
        if freq is None:
            raise ValueError('freq is not specified')
        result.freq = Period._maybe_convert_freq(freq)
        result._reset_identity()
        return result
예제 #57
0
파일: period.py 프로젝트: DGrady/pandas
    def _simple_new(cls, values, name=None, freq=None, **kwargs):

        if not is_integer_dtype(values):
            values = np.array(values, copy=False)
            if (len(values) > 0 and is_float_dtype(values)):
                raise TypeError("PeriodIndex can't take floats")
            else:
                return PeriodIndex(values, name=name, freq=freq, **kwargs)

        values = np.array(values, dtype='int64', copy=False)

        result = object.__new__(cls)
        result._data = values
        result.name = name
        if freq is None:
            raise ValueError('freq is not specified')
        result.freq = Period._maybe_convert_freq(freq)
        result._reset_identity()
        return result
예제 #58
0
def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
                isfinite=False, copy=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype, fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = _maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max