Ejemplo n.º 1
0
def nankurt(values, axis=None, skipna=True):
    """ Compute the sample skewness.

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G2, computed directly from the second and fourth
    central moment.

    """
    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
        count = _get_counts(mask, axis)
    else:
        count = _get_counts(mask, axis, dtype=values.dtype)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted**2
    adjusted4 = adjusted2**2
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m4 = adjusted4.sum(axis, dtype=np.float64)

    adj = 3 * (count - 1)**2 / ((count - 2) * (count - 3))
    numer = count * (count + 1) * (count - 1) * m4
    denom = (count - 2) * (count - 3) * m2**2
    result = numer / denom - adj

    # floating point error
    numer = _zero_out_fperr(numer)
    denom = _zero_out_fperr(denom)

    if not isinstance(denom, np.ndarray):
        # if ``denom`` is a scalar, check these corner cases first before
        # doing division
        if count < 4:
            return np.nan
        if denom == 0:
            return 0

    result = numer / denom - adj

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(denom == 0, 0, result)
        result[count < 4] = np.nan

    return result
Ejemplo n.º 2
0
def nankurt(values, axis=None, skipna=True):
    """ Compute the sample skewness.

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G2, computed directly from the second and fourth
    central moment.

    """
    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
        count = _get_counts(mask, axis)
    else:
        count = _get_counts(mask, axis, dtype=values.dtype)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted ** 2
    adjusted4 = adjusted2 ** 2
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m4 = adjusted4.sum(axis, dtype=np.float64)

    adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
    numer = count * (count + 1) * (count - 1) * m4
    denom = (count - 2) * (count - 3) * m2**2
    result = numer / denom - adj

    # floating point error
    numer = _zero_out_fperr(numer)
    denom = _zero_out_fperr(denom)

    if not isinstance(denom, np.ndarray):
        # if ``denom`` is a scalar, check these corner cases first before
        # doing division
        if count < 4:
            return np.nan
        if denom == 0:
            return 0

    result = numer / denom - adj

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(denom == 0, 0, result)
        result[count < 4] = np.nan

    return result
Ejemplo n.º 3
0
def nanskew(values, axis=None, skipna=True):
    """ Compute the sample skewness.

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G1. The algorithm computes this coefficient directly
    from the second and third central moment.

    """

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
        count = _get_counts(mask, axis)
    else:
        count = _get_counts(mask, axis, dtype=values.dtype)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted ** 2
    adjusted3 = adjusted2 * adjusted
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m3 = adjusted3.sum(axis, dtype=np.float64)

    # floating point error
    m2 = _zero_out_fperr(m2)
    m3 = _zero_out_fperr(m3)

    result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5)

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(m2 == 0, 0, result)
        result[count < 3] = np.nan
        return result
    else:
        result = 0 if m2 == 0 else result
        if count < 3:
            return np.nan
        return result
Ejemplo n.º 4
0
def nanskew(values, axis=None, skipna=True):
    """ Compute the sample skewness.

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G1. The algorithm computes this coefficient directly
    from the second and third central moment.

    """

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
        count = _get_counts(mask, axis)
    else:
        count = _get_counts(mask, axis, dtype=values.dtype)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted**2
    adjusted3 = adjusted2 * adjusted
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m3 = adjusted3.sum(axis, dtype=np.float64)

    # floating point error
    m2 = _zero_out_fperr(m2)
    m3 = _zero_out_fperr(m3)

    result = (count * (count - 1)**0.5 / (count - 2)) * (m3 / m2**1.5)

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(m2 == 0, 0, result)
        result[count < 3] = np.nan
        return result
    else:
        result = 0 if m2 == 0 else result
        if count < 3:
            return np.nan
        return result
Ejemplo n.º 5
0
    def _prep_values(self, values=None, kill_inf=True, how=None):

        if values is None:
            values = getattr(self._selected_obj, 'values', self._selected_obj)

        # GH #12373 : rolling functions error on float32 data
        # make sure the data is coerced to float64
        if com.is_float_dtype(values.dtype):
            values = com._ensure_float64(values)
        elif com.is_integer_dtype(values.dtype):
            values = com._ensure_float64(values)
        elif com.needs_i8_conversion(values.dtype):
            raise NotImplementedError("ops for {action} for this "
                                      "dtype {dtype} are not "
                                      "implemented".format(
                                          action=self._window_type,
                                          dtype=values.dtype))
        else:
            try:
                values = com._ensure_float64(values)
            except (ValueError, TypeError):
                raise TypeError("cannot handle this type -> {0}"
                                "".format(values.dtype))

        if kill_inf:
            values = values.copy()
            values[np.isinf(values)] = np.NaN

        return values
Ejemplo n.º 6
0
 def test_delevel_infer_dtype(self):
     tuples = [tuple for tuple in cart_product(["foo", "bar"], [10, 20], [1.0, 1.1])]
     index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"])
     df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index)
     deleveled = df.reset_index()
     self.assert_(com.is_integer_dtype(deleveled["prm1"]))
     self.assert_(com.is_float_dtype(deleveled["prm2"]))
Ejemplo n.º 7
0
def _get_data_algo(values, func_map):
    mask = None
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)

    elif com.needs_i8_conversion(values):

        # if we have NaT, punt to object dtype
        mask = com.isnull(values)
        if mask.ravel().any():
            f = func_map['generic']
            values = com._ensure_object(values)
            values[mask] = np.nan
        else:
            f = func_map['int64']
            values = values.view('i8')

    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Ejemplo n.º 8
0
def na_value_for_dtype(dtype):
    """
    Return a dtype compat na value

    Parameters
    ----------
    dtype : string / dtype

    Returns
    -------
    dtype compat na value
    """

    from pandas.core import common as com
    from pandas import NaT
    dtype = pandas_dtype(dtype)

    if (com.is_datetime64_dtype(dtype) or
        com.is_datetime64tz_dtype(dtype) or
        com.is_timedelta64_dtype(dtype)):
        return NaT
    elif com.is_float_dtype(dtype):
        return np.nan
    elif com.is_integer_dtype(dtype):
        return 0
    elif com.is_bool_dtype(dtype):
        return False
    return np.nan
Ejemplo n.º 9
0
Archivo: sql.py Proyecto: yazici/pandas
    def _sqlalchemy_type(self, arr_or_dtype):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
                                      DateTime, Date, Interval)

        if arr_or_dtype is date:
            return Date
        if com.is_datetime64_dtype(arr_or_dtype):
            try:
                tz = arr_or_dtype.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(arr_or_dtype):
            warnings.warn(
                "the 'timedelta' type is not supported, and will be "
                "written as integer values (ns frequency) to the "
                "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(arr_or_dtype):
            return Float
        elif com.is_integer_dtype(arr_or_dtype):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(arr_or_dtype):
            return Boolean
        return Text
Ejemplo n.º 10
0
def _convert_DataFrame(rdf):
    columns = list(rdf.colnames)
    rows = np.array(rdf.rownames)

    data = {}
    for i, col in enumerate(columns):
        vec = rdf.rx2(i + 1)
        values = _convert_vector(vec)

        if isinstance(vec, robj.FactorVector):
            levels = np.asarray(vec.levels)
            if com.is_float_dtype(values):
                mask = np.isnan(values)
                notmask = -mask
                result = np.empty(len(values), dtype=object)
                result[mask] = np.nan

                locs = (values[notmask] - 1).astype(np.int_)
                result[notmask] = levels.take(locs)
                values = result
            else:
                values = np.asarray(vec.levels).take(values - 1)

        data[col] = values

    return pd.DataFrame(data, index=_check_int(rows), columns=columns)
Ejemplo n.º 11
0
def nanskew(values, axis=None, skipna=True):

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')

    count = _get_counts(mask, axis)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    A = values.sum(axis) / count
    B = (values ** 2).sum(axis) / count - A ** 2
    C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B

    # floating point error
    B = _zero_out_fperr(B)
    C = _zero_out_fperr(C)

    result = ((np.sqrt((count ** 2 - count)) * C) /
              ((count - 2) * np.sqrt(B) ** 3))

    if isinstance(result, np.ndarray):
        result = np.where(B == 0, 0, result)
        result[count < 3] = np.nan
        return result
    else:
        result = 0 if B == 0 else result
        if count < 3:
            return np.nan
        return result
Ejemplo n.º 12
0
def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values)
            or is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
Ejemplo n.º 13
0
def format_array(values, formatter, float_format=None, na_rep='NaN',
                 digits=None, space=None, justify='right'):
    if com.is_float_dtype(values.dtype):
        fmt_klass = FloatArrayFormatter
    elif com.is_integer_dtype(values.dtype):
        fmt_klass = IntArrayFormatter
    elif com.is_datetime64_dtype(values.dtype):
        fmt_klass = Datetime64Formatter
    else:
        fmt_klass = GenericArrayFormatter

    if space is None:
        space = get_option("print.column_space")

    if float_format is None:
        float_format = get_option("print.float_format")

    if digits is None:
        digits = get_option("print.precision")

    fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
                        float_format=float_format,
                        formatter=formatter, space=space,
                        justify=justify)

    return fmt_obj.get_result()
Ejemplo n.º 14
0
Archivo: sql.py Proyecto: Jemash/pandas
    def _sqlalchemy_type(self, col):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
            DateTime, Date, Time, Interval)

        if com.is_datetime64_dtype(col):
            try:
                tz = col.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(col):
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(col):
            return Float
        elif com.is_integer_dtype(col):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(col):
            return Boolean
        inferred = lib.infer_dtype(com._ensure_object(col))
        if inferred == 'date':
            return Date
        if inferred == 'time':
            return Time
        return Text
Ejemplo n.º 15
0
def _convert_DataFrame(rdf):
    columns = list(rdf.colnames)
    rows = np.array(rdf.rownames)

    data = {}
    for i, col in enumerate(columns):
        vec = rdf.rx2(i + 1)
        values = _convert_vector(vec)

        if isinstance(vec, robj.FactorVector):
            levels = np.asarray(vec.levels)
            if com.is_float_dtype(values):
                mask = np.isnan(values)
                notmask = -mask
                result = np.empty(len(values), dtype=object)
                result[mask] = np.nan

                locs = (values[notmask] - 1).astype(np.int_)
                result[notmask] = levels.take(locs)
                values = result
            else:
                values = np.asarray(vec.levels).take(values - 1)

        data[col] = values

    return pd.DataFrame(data, index=_check_int(rows), columns=columns)
Ejemplo n.º 16
0
    def _sqlalchemy_type(self, col):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
                                      DateTime, Date, Time)

        if com.is_datetime64_dtype(col):
            try:
                tz = col.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(col):
            warnings.warn(
                "the 'timedelta' type is not supported, and will be "
                "written as integer values (ns frequency) to the "
                "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(col):
            return Float
        elif com.is_integer_dtype(col):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(col):
            return Boolean
        inferred = lib.infer_dtype(com._ensure_object(col))
        if inferred == 'date':
            return Date
        if inferred == 'time':
            return Time
        return Text
Ejemplo n.º 17
0
def backfill_2d(values, limit=None, mask=None, dtype=None):

    if dtype is None:
        dtype = values.dtype
    _method = None
    if com.is_float_dtype(values):
        _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
    elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values):
        _method = _backfill_2d_datetime
    elif com.is_integer_dtype(values):
        values = com._ensure_float64(values)
        _method = algos.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_2d_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name)

    if mask is None:
        mask = com.isnull(values)
    mask = mask.view(np.uint8)

    if np.all(values.shape):
        _method(values, mask, limit=limit)
    else:
        # for test coverage
        pass
    return values
Ejemplo n.º 18
0
def _get_data_algo(values, func_map):
    mask = None
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)

    elif com.needs_i8_conversion(values):

        # if we have NaT, punt to object dtype
        mask = com.isnull(values)
        if mask.ravel().any():
            f = func_map['generic']
            values = com._ensure_object(values)
            values[mask] = np.nan
        else:
            f = func_map['int64']
            values = values.view('i8')

    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Ejemplo n.º 19
0
def _isfinite(values):
    if _is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values) or
            is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
Ejemplo n.º 20
0
def format_array(values, formatter, float_format=None, na_rep='NaN',
                 digits=None, space=None, justify='right'):
    if com.is_float_dtype(values.dtype):
        fmt_klass = FloatArrayFormatter
    elif com.is_integer_dtype(values.dtype):
        fmt_klass = IntArrayFormatter
    elif com.is_datetime64_dtype(values.dtype):
        fmt_klass = Datetime64Formatter
    else:
        fmt_klass = GenericArrayFormatter

    if space is None:
        space = print_config.column_space

    if float_format is None:
        float_format = print_config.float_format

    if digits is None:
        digits = print_config.precision

    fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
                        float_format=float_format,
                        formatter=formatter, space=space,
                        justify=justify)

    return fmt_obj.get_result()
Ejemplo n.º 21
0
    def _sqlalchemy_type(self, arr_or_dtype):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
            DateTime, Date, Interval)

        if arr_or_dtype is date:
            return Date
        if com.is_datetime64_dtype(arr_or_dtype):
            try:
                tz = arr_or_dtype.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(arr_or_dtype):
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(arr_or_dtype):
            return Float
        elif com.is_integer_dtype(arr_or_dtype):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(arr_or_dtype):
            return Boolean
        return Text
Ejemplo n.º 22
0
def nankurt(values, axis=None, skipna=True):

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')

    count = _get_counts(mask, axis)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    A = values.sum(axis) / count
    B = (values ** 2).sum(axis) / count - A ** 2
    C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B
    D = (values ** 4).sum(axis) / count - A ** 4 - 6 * B * A * A - 4 * C * A

    B = _zero_out_fperr(B)
    D = _zero_out_fperr(D)

    if not isinstance(B, np.ndarray):
        # if B is a scalar, check these corner cases first before doing division
        if count < 4:
            return np.nan
        if B == 0:
            return 0

    result = (((count * count - 1.) * D / (B * B) - 3 * ((count - 1.) ** 2)) /
              ((count - 2.) * (count - 3.)))

    if isinstance(result, np.ndarray):
        result = np.where(B == 0, 0, result)
        result[count < 4] = np.nan

    return result
Ejemplo n.º 23
0
def nanskew(values, axis=None, skipna=True):

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
        count = _get_counts(mask, axis)
    else:
        count = _get_counts(mask, axis, dtype=values.dtype)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    typ = values.dtype.type
    A = values.sum(axis) / count
    B = (values**2).sum(axis) / count - A**typ(2)
    C = (values**3).sum(axis) / count - A**typ(3) - typ(3) * A * B

    # floating point error
    B = _zero_out_fperr(B)
    C = _zero_out_fperr(C)

    result = ((np.sqrt(count * count - count) * C) /
              ((count - typ(2)) * np.sqrt(B)**typ(3)))

    if isinstance(result, np.ndarray):
        result = np.where(B == 0, 0, result)
        result[count < 3] = np.nan
        return result
    else:
        result = 0 if B == 0 else result
        if count < 3:
            return np.nan
        return result
Ejemplo n.º 24
0
    def convert(values, unit, axis):
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (com.is_integer(values) or com.is_float(values)):
            return values
        elif isinstance(values, compat.string_types):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = com._asarray_tuplesafe(values)

            if com.is_integer_dtype(values) or com.is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Ejemplo n.º 25
0
    def convert(values, unit, axis):
        from pandas.tseries.index import DatetimeIndex
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (com.is_integer(values) or com.is_float(values)):
            return values
        elif isinstance(values, basestring):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = np._asarray_tuplesafe(values)

            if com.is_integer_dtype(values) or com.is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Ejemplo n.º 26
0
Archivo: nanops.py Proyecto: DT021/wau
def nansum(values, axis=None, skipna=True):
    values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
    dtype_sum = dtype_max
    if is_float_dtype(dtype):
        dtype_sum = dtype
    the_sum = values.sum(axis, dtype=dtype_sum)
    the_sum = _maybe_null_out(the_sum, axis, mask)

    return _wrap_results(the_sum, dtype)
Ejemplo n.º 27
0
def _pad(values):
    if com.is_float_dtype(values):
        _method = lib.pad_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = lib.pad_2d_inplace_object
    else:
        raise ValueError('Invalid dtype for padding')

    _method(values, com.isnull(values).view(np.uint8))
Ejemplo n.º 28
0
Archivo: nanops.py Proyecto: DT021/wau
def nansem(values, axis=None, skipna=True, ddof=1):
    var = nanvar(values, axis, skipna, ddof=ddof)

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
    count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype)

    return np.sqrt(var) / np.sqrt(count)
Ejemplo n.º 29
0
def nansem(values, axis=None, skipna=True, ddof=1):
    var = nanvar(values, axis, skipna, ddof=ddof)

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
    count, _ = _get_counts_nanvar(mask, axis, ddof)

    return np.sqrt(var)/np.sqrt(count)
Ejemplo n.º 30
0
def _backfill(values):
    if com.is_float_dtype(values):
        _method = lib.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = lib.backfill_2d_inplace_object
    else:  # pragma: no cover
        raise ValueError('Invalid dtype for padding')

    _method(values, com.isnull(values).view(np.uint8))
Ejemplo n.º 31
0
def _backfill(values):
    if com.is_float_dtype(values):
        _method = lib.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = lib.backfill_2d_inplace_object
    else: # pragma: no cover
        raise ValueError('Invalid dtype for padding')

    _method(values, com.isnull(values).view(np.uint8))
Ejemplo n.º 32
0
def _hashtable_algo(f, dtype):
    """
    f(HashTable, type_caster) -> result
    """
    if com.is_float_dtype(dtype):
        return f(htable.Float64HashTable, com._ensure_float64)
    elif com.is_integer_dtype(dtype):
        return f(htable.Int64HashTable, com._ensure_int64)
    else:
        return f(htable.PyObjectHashTable, com._ensure_object)
Ejemplo n.º 33
0
 def test_delevel_infer_dtype(self):
     tuples = [tuple for tuple in cart_product(['foo', 'bar'],
                                               [10, 20], [1.0, 1.1])]
     index = MultiIndex.from_tuples(tuples,
                                    names=['prm0', 'prm1', 'prm2'])
     df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'],
                    index=index)
     deleveled = df.delevel()
     self.assert_(com.is_integer_dtype(deleveled['prm1']))
     self.assert_(com.is_float_dtype(deleveled['prm2']))
Ejemplo n.º 34
0
def _hashtable_algo(f, dtype):
    """
    f(HashTable, type_caster) -> result
    """
    if com.is_float_dtype(dtype):
        return f(htable.Float64HashTable, com._ensure_float64)
    elif com.is_integer_dtype(dtype):
        return f(htable.Int64HashTable, com._ensure_int64)
    else:
        return f(htable.PyObjectHashTable, com._ensure_object)
Ejemplo n.º 35
0
 def test_delevel_infer_dtype(self):
     tuples = [tuple for tuple in cart_product(['foo', 'bar'],
                                               [10, 20], [1.0, 1.1])]
     index = MultiIndex.from_tuples(tuples,
                                    names=['prm0', 'prm1', 'prm2'])
     df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'],
                    index=index)
     deleveled = df.reset_index()
     self.assert_(com.is_integer_dtype(deleveled['prm1']))
     self.assert_(com.is_float_dtype(deleveled['prm2']))
Ejemplo n.º 36
0
def count(values, uniques=None):
    if uniques is not None:
        raise NotImplementedError
    else:
        if com.is_float_dtype(values):
            return _count_generic(values, lib.Float64HashTable, _ensure_float64)
        elif com.is_integer_dtype(values):
            return _count_generic(values, lib.Int64HashTable, _ensure_int64)
        else:
            return _count_generic(values, lib.PyObjectHashTable, _ensure_object)
Ejemplo n.º 37
0
 def astype(self, dtype):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype) or is_integer_dtype(dtype):
         values = self._values.astype(dtype)
     elif is_object_dtype(dtype):
         values = self._values
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Ejemplo n.º 38
0
def _get_hash_table_and_cast(values):
    if com.is_float_dtype(values):
        klass = lib.Float64HashTable
        values = com._ensure_float64(values)
    elif com.is_integer_dtype(values):
        klass = lib.Int64HashTable
        values = com._ensure_int64(values)
    else:
        klass = lib.PyObjectHashTable
        values = com._ensure_object(values)
    return klass, values
Ejemplo n.º 39
0
def _get_hash_table_and_cast(values):
    if com.is_float_dtype(values):
        klass = lib.Float64HashTable
        values = com._ensure_float64(values)
    elif com.is_integer_dtype(values):
        klass = lib.Int64HashTable
        values = com._ensure_int64(values)
    else:
        klass = lib.PyObjectHashTable
        values = com._ensure_object(values)
    return klass, values
Ejemplo n.º 40
0
def _value_counts_arraylike(values, dropna=True):
    is_datetimetz = com.is_datetimetz(values)
    is_period = (isinstance(values, gt.ABCPeriodIndex)
                 or com.is_period_arraylike(values))

    orig = values

    from pandas.core.series import Series
    values = Series(values).values
    dtype = values.dtype

    if com.is_datetime_or_timedelta_dtype(dtype) or is_period:
        from pandas.tseries.index import DatetimeIndex
        from pandas.tseries.period import PeriodIndex

        if is_period:
            values = PeriodIndex(values)
            freq = values.freq

        values = values.view(np.int64)
        keys, counts = htable.value_count_scalar64(values, dropna)

        if dropna:
            msk = keys != iNaT
            keys, counts = keys[msk], counts[msk]

        # convert the keys back to the dtype we came in
        keys = keys.astype(dtype)

        # dtype handling
        if is_datetimetz:
            if isinstance(orig, gt.ABCDatetimeIndex):
                tz = orig.tz
            else:
                tz = orig.dt.tz
            keys = DatetimeIndex._simple_new(keys, tz=tz)
        if is_period:
            keys = PeriodIndex._simple_new(keys, freq=freq)

    elif com.is_integer_dtype(dtype):
        values = com._ensure_int64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    elif com.is_float_dtype(dtype):
        values = com._ensure_float64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    else:
        values = com._ensure_object(values)
        mask = com.isnull(values)
        keys, counts = htable.value_count_object(values, mask)
        if not dropna and mask.any():
            keys = np.insert(keys, 0, np.NaN)
            counts = np.insert(counts, 0, mask.sum())

    return keys, counts
Ejemplo n.º 41
0
def _value_counts_arraylike(values, dropna=True):
    is_datetimetz = com.is_datetimetz(values)
    is_period = (isinstance(values, gt.ABCPeriodIndex) or
                 com.is_period_arraylike(values))

    orig = values

    from pandas.core.series import Series
    values = Series(values).values
    dtype = values.dtype

    if com.is_datetime_or_timedelta_dtype(dtype) or is_period:
        from pandas.tseries.index import DatetimeIndex
        from pandas.tseries.period import PeriodIndex

        if is_period:
            values = PeriodIndex(values)
            freq = values.freq

        values = values.view(np.int64)
        keys, counts = htable.value_count_scalar64(values, dropna)

        if dropna:
            msk = keys != iNaT
            keys, counts = keys[msk], counts[msk]

        # convert the keys back to the dtype we came in
        keys = keys.astype(dtype)

        # dtype handling
        if is_datetimetz:
            if isinstance(orig, gt.ABCDatetimeIndex):
                tz = orig.tz
            else:
                tz = orig.dt.tz
            keys = DatetimeIndex._simple_new(keys, tz=tz)
        if is_period:
            keys = PeriodIndex._simple_new(keys, freq=freq)

    elif com.is_integer_dtype(dtype):
        values = com._ensure_int64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    elif com.is_float_dtype(dtype):
        values = com._ensure_float64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    else:
        values = com._ensure_object(values)
        mask = com.isnull(values)
        keys, counts = htable.value_count_object(values, mask)
        if not dropna and mask.any():
            keys = np.insert(keys, 0, np.NaN)
            counts = np.insert(counts, 0, mask.sum())

    return keys, counts
Ejemplo n.º 42
0
def _get_data_algo(values, func_map):
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)
    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Ejemplo n.º 43
0
 def astype(self, dtype):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype) or is_integer_dtype(dtype):
         values = self._values.astype(dtype)
     elif is_object_dtype(dtype):
         values = self._values
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Ejemplo n.º 44
0
def _get_data_algo(values, func_map):
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)
    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Ejemplo n.º 45
0
def fill_zeros(result, x, y, name, fill):
    """
    if this is a reversed op, then flip x,y

    if we have an integer value (or array in y)
    and we have 0's, fill them with the fill,
    return the result

    mask the nan's from x
    """
    if fill is None or com.is_float_dtype(result):
        return result

    if name.startswith(('r', '__r')):
        x, y = y, x

    is_typed_variable = (hasattr(y, 'dtype') or hasattr(y, 'type'))
    is_scalar = lib.isscalar(y)

    if not is_typed_variable and not is_scalar:
        return result

    if is_scalar:
        y = np.array(y)

    if com.is_integer_dtype(y):

        if (y == 0).any():

            # GH 7325, mask and nans must be broadcastable (also: PR 9308)
            # Raveling and then reshaping makes np.putmask faster
            mask = ((y == 0) & ~np.isnan(result)).ravel()

            shape = result.shape
            result = result.astype('float64', copy=False).ravel()

            np.putmask(result, mask, fill)

            # if we have a fill of inf, then sign it correctly
            # (GH 6178 and PR 9308)
            if np.isinf(fill):
                signs = np.sign(y if name.startswith(('r', '__r')) else x)
                negative_inf_mask = (signs.ravel() < 0) & mask
                np.putmask(result, negative_inf_mask, -fill)

            if "floordiv" in name:  # (PR 9308)
                nan_mask = ((y == 0) & (x == 0)).ravel()
                np.putmask(result, nan_mask, np.nan)

            result = result.reshape(shape)

    return result
Ejemplo n.º 46
0
def count(values, uniques=None):
    if uniques is not None:
        raise NotImplementedError
    else:
        if com.is_float_dtype(values):
            return _count_generic(values, lib.Float64HashTable,
                                  com._ensure_float64)
        elif com.is_integer_dtype(values):
            return _count_generic(values, lib.Int64HashTable,
                                  com._ensure_int64)
        else:
            return _count_generic(values, lib.PyObjectHashTable,
                                  com._ensure_object)
Ejemplo n.º 47
0
def _get_data_algo(values, func_map):
    if com.is_float_dtype(values):
        f = func_map["float64"]
        values = com._ensure_float64(values)
    elif com.is_datetime64_dtype(values):
        f = func_map["int64"]
        values = values.view("i8")
    elif com.is_integer_dtype(values):
        f = func_map["int64"]
        values = com._ensure_int64(values)
    else:
        f = func_map["generic"]
        values = com._ensure_object(values)
    return f, values
Ejemplo n.º 48
0
def nanvar(values, axis=None, skipna=True, ddof=1):

    dtype = values.dtype
    mask = isnull(values)
    if is_any_int_dtype(values):
        values = values.astype('f8')
        values[mask] = np.nan

    if is_float_dtype(values):
        count, d = _get_counts_nanvar(mask, axis, ddof, values.dtype)
    else:
        count, d = _get_counts_nanvar(mask, axis, ddof)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)


    # xref GH10242
    # Compute variance via two-pass algorithm, which is stable against
    # cancellation errors and relatively accurate for small numbers of
    # observations.
    #
    # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
    if axis is not None:
        avg = np.expand_dims(avg, axis)
    sqr = _ensure_numeric((avg - values) ** 2)
    np.putmask(sqr, mask, 0)
    result = sqr.sum(axis=axis, dtype=np.float64) / d

    # Return variance as np.float64 (the datatype used in the accumulator),
    # unless we were dealing with a float array, in which case use the same
    # precision as the original values array.
    if is_float_dtype(dtype):
        result = result.astype(dtype)
    return _wrap_results(result, values.dtype)
Ejemplo n.º 49
0
    def test_set_value(self):
        for item in self.panel.items:
            for mjr in self.panel.major_axis[::2]:
                for mnr in self.panel.minor_axis:
                    self.panel.set_value(item, mjr, mnr, 1.0)
                    assert_almost_equal(self.panel[item][mnr][mjr], 1.0)

        # resize
        res = self.panel.set_value("ItemE", "foo", "bar", 1.5)
        self.assert_(isinstance(res, Panel))
        self.assert_(res is not self.panel)
        self.assertEqual(res.get_value("ItemE", "foo", "bar"), 1.5)

        res3 = self.panel.set_value("ItemE", "foobar", "baz", 5)
        self.assert_(com.is_float_dtype(res3["ItemE"].values))
Ejemplo n.º 50
0
def nanvar(values, axis=None, skipna=True, ddof=1):

    dtype = values.dtype
    mask = isnull(values)
    if is_any_int_dtype(values):
        values = values.astype('f8')
        values[mask] = np.nan

    if is_float_dtype(values):
        count, d = _get_counts_nanvar(mask, axis, ddof, values.dtype)
    else:
        count, d = _get_counts_nanvar(mask, axis, ddof)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    # xref GH10242
    # Compute variance via two-pass algorithm, which is stable against
    # cancellation errors and relatively accurate for small numbers of
    # observations.
    #
    # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
    if axis is not None:
        avg = np.expand_dims(avg, axis)
    sqr = _ensure_numeric((avg - values)**2)
    np.putmask(sqr, mask, 0)
    result = sqr.sum(axis=axis, dtype=np.float64) / d

    # Return variance as np.float64 (the datatype used in the accumulator),
    # unless we were dealing with a float array, in which case use the same
    # precision as the original values array.
    if is_float_dtype(dtype):
        result = result.astype(dtype)
    return _wrap_results(result, values.dtype)
Ejemplo n.º 51
0
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Ejemplo n.º 52
0
    def test_set_value(self):
        for item in self.panel.items:
            for mjr in self.panel.major_axis[::2]:
                for mnr in self.panel.minor_axis:
                    self.panel.set_value(item, mjr, mnr, 1.)
                    assert_almost_equal(self.panel[item][mnr][mjr], 1.)

        # resize
        res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5)
        self.assert_(isinstance(res, Panel))
        self.assert_(res is not self.panel)
        self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5)

        res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5)
        self.assert_(com.is_float_dtype(res3['ItemE'].values))
Ejemplo n.º 53
0
def _get_values(values,
                skipna,
                fill_value=None,
                fill_value_typ=None,
                isfinite=False,
                copy=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype,
                                 fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = _maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max
Ejemplo n.º 54
0
def _get_data_algo(values, func_map):
    if com.is_float_dtype(values):
        f = func_map['float64']
        values = com._ensure_float64(values)

    elif com.needs_i8_conversion(values):
        f = func_map['int64']
        values = values.view('i8')

    elif com.is_integer_dtype(values):
        f = func_map['int64']
        values = com._ensure_int64(values)
    else:
        f = func_map['generic']
        values = com._ensure_object(values)
    return f, values
Ejemplo n.º 55
0
def _hashtable_algo(f, dtype, return_dtype=None):
    """
    f(HashTable, type_caster) -> result
    """
    if com.is_float_dtype(dtype):
        return f(htable.Float64HashTable, com._ensure_float64)
    elif com.is_integer_dtype(dtype):
        return f(htable.Int64HashTable, com._ensure_int64)
    elif com.is_datetime64_dtype(dtype):
        return_dtype = return_dtype or 'M8[ns]'
        return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype)
    elif com.is_timedelta64_dtype(dtype):
        return_dtype = return_dtype or 'm8[ns]'
        return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype)
    else:
        return f(htable.PyObjectHashTable, com._ensure_object)
Ejemplo n.º 56
0
    def test_set_value(self):
        for label in self.panel4d.labels:
            for item in self.panel4d.items:
                for mjr in self.panel4d.major_axis[::2]:
                    for mnr in self.panel4d.minor_axis:
                        self.panel4d.set_value(label, item, mjr, mnr, 1.)
                        assert_almost_equal(
                            self.panel4d[label][item][mnr][mjr], 1.)

        # resize
        res = self.panel4d.set_value('l4', 'ItemE', 'foo', 'bar', 1.5)
        tm.assertIsInstance(res, Panel4D)
        self.assertIsNot(res, self.panel4d)
        self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5)

        res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5)
        self.assertTrue(com.is_float_dtype(res3['l4'].values))