Beispiel #1
0
    def _getitem_axis(self, key, axis=0):
        labels = self.obj._get_axis(axis)
        if isinstance(key, slice):
            return self._get_slice_axis(key, axis=axis)
        elif _is_list_like(key) and not (isinstance(key, tuple) and
                                         isinstance(labels, MultiIndex)):

            if hasattr(key, 'ndim') and key.ndim > 1:
                raise ValueError('Cannot index with multidimensional key')

            return self._getitem_iterable(key, axis=axis)
        elif axis == 0:
            is_int_index = _is_integer_index(labels)

            idx = key
            if com.is_integer(key):
                if isinstance(labels, MultiIndex):
                    try:
                        return self._get_label(key, axis=0)
                    except (KeyError, TypeError):
                        if _is_integer_index(self.obj.index.levels[0]):
                            raise

                if not is_int_index:
                    idx = labels[key]

            return self._get_label(idx, axis=0)
        else:
            labels = self.obj._get_axis(axis)
            lab = key
            if com.is_integer(key) and not _is_integer_index(labels):
                lab = labels[key]
            return self._get_label(lab, axis=axis)
Beispiel #2
0
    def _getitem_axis(self, key, axis=0):
        if isinstance(key, slice):
            return self._get_slice_axis(key, axis=axis)
        elif _is_list_like(key):
            return self._getitem_iterable(key, axis=axis)
        elif axis == 0:
            labels = self.obj._get_axis(0)
            is_int_index = _is_integer_index(labels)

            idx = key
            if com.is_integer(key):
                if isinstance(labels, MultiIndex):
                    try:
                        return self._get_label(key, axis=0)
                    except (KeyError, TypeError):
                        if _is_integer_index(self.obj.index.levels[0]):
                            raise

                if not is_int_index:
                    idx = labels[key]

            return self._get_label(idx, axis=0)
        else:
            labels = self.obj._get_axis(axis)
            lab = key
            if com.is_integer(key) and not _is_integer_index(labels):
                lab = labels[key]
            return self._get_label(lab, axis=axis)
Beispiel #3
0
def get_freq_code(freqstr):
    """

    Parameters
    ----------

    Returns
    -------
    """
    if isinstance(freqstr, DateOffset):
        freqstr = (get_offset_name(freqstr), freqstr.n)

    if isinstance(freqstr, tuple):
        if (com.is_integer(freqstr[0]) and
            com.is_integer(freqstr[1])):
            #e.g., freqstr = (2000, 1)
            return freqstr
        else:
            #e.g., freqstr = ('T', 5)
            try:
                code = _period_str_to_code(freqstr[0])
                stride = freqstr[1]
            except:
                code = _period_str_to_code(freqstr[1])
                stride = freqstr[0]
            return code, stride

    if com.is_integer(freqstr):
        return (freqstr, 1)

    base, stride = _base_and_stride(freqstr)
    code = _period_str_to_code(base)

    return code, stride
Beispiel #4
0
 def _get_formatter(self, i):
     if isinstance(self.formatters, (list, tuple)):
         if com.is_integer(i):
             return self.formatters[i]
         else:
             return None
     else:
         if com.is_integer(i) and i not in self.columns:
             i = self.columns[i]
         return self.formatters.get(i, None)
Beispiel #5
0
def get_freq_code(freqstr):
    """
    Return freq str or tuple to freq code and stride (mult)

    Parameters
    ----------
    freqstr : str or tuple

    Returns
    -------
    return : tuple of base frequency code and stride (mult)

    Example
    -------
    >>> get_freq_code('3D')
    (6000, 3)

    >>> get_freq_code('D')
    (6000, 1)

    >>> get_freq_code(('D', 3))
    (6000, 3)
    """
    if isinstance(freqstr, DateOffset):
        freqstr = (freqstr.rule_code, freqstr.n)

    if isinstance(freqstr, tuple):
        if (com.is_integer(freqstr[0]) and
                com.is_integer(freqstr[1])):
            # e.g., freqstr = (2000, 1)
            return freqstr
        else:
            # e.g., freqstr = ('T', 5)
            try:
                code = _period_str_to_code(freqstr[0])
                stride = freqstr[1]
            except:
                if com.is_integer(freqstr[1]):
                    raise
                code = _period_str_to_code(freqstr[1])
                stride = freqstr[0]
            return code, stride

    if com.is_integer(freqstr):
        return (freqstr, 1)

    base, stride = _base_and_stride(freqstr)
    code = _period_str_to_code(base)

    return code, stride
Beispiel #6
0
            def _evaluate_numeric_binop(self, other):

                other = self._validate_for_numeric_binop(other, op, opstr)
                attrs = self._get_attributes_dict()
                attrs = self._maybe_update_attributes(attrs)

                if reversed:
                    self, other = other, self

                try:
                    # alppy if we have an override
                    if step:
                        rstep = step(self._step, other)

                        # we don't have a representable op
                        # so return a base index
                        if not com.is_integer(rstep) or not rstep:
                            raise ValueError

                    else:
                        rstep = self._step

                    rstart = op(self._start, other)
                    rstop = op(self._stop, other)

                    result = RangeIndex(rstart,
                                        rstop,
                                        rstep,
                                        **attrs)

                    # for compat with numpy / Int64Index
                    # even if we can represent as a RangeIndex, return
                    # as a Float64Index if we have float-like descriptors
                    if not all([com.is_integer(x) for x in
                                [rstart, rstop, rstep]]):
                        result = result.astype('float64')

                    return result

                except (ValueError, TypeError, AttributeError):
                    pass

                # convert to Int64Index ops
                if isinstance(self, RangeIndex):
                    self = self.values
                if isinstance(other, RangeIndex):
                    other = other.values

                return Index(op(self, other), **attrs)
Beispiel #7
0
    def test_quantile_interpolation_dtype(self):
        # GH #10174
        if _np_version_under1p9:
            raise nose.SkipTest("Numpy version is under 1.9")

        from numpy import percentile

        # interpolation = linear (default case)
        q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower')
        self.assertEqual(q, percentile(np.array([1, 3, 4]), 50))
        self.assertTrue(com.is_integer(q))

        q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher')
        self.assertEqual(q, percentile(np.array([1, 3, 4]), 50))
        self.assertTrue(com.is_integer(q))
Beispiel #8
0
    def __new__(
        cls,
        data=None,
        ordinal=None,
        freq=None,
        start=None,
        end=None,
        periods=None,
        copy=False,
        name=None,
        tz=None,
        **kwargs
    ):

        freq = frequencies.get_standard_freq(freq)

        if periods is not None:
            if com.is_float(periods):
                periods = int(periods)
            elif not com.is_integer(periods):
                raise ValueError("Periods must be a number, got %s" % str(periods))

        if data is None:
            if ordinal is not None:
                data = np.asarray(ordinal, dtype=np.int64)
            else:
                data, freq = cls._generate_range(start, end, periods, freq, kwargs)
        else:
            ordinal, freq = cls._from_arraylike(data, freq, tz)
            data = np.array(ordinal, dtype=np.int64, copy=False)

        return cls._simple_new(data, name=name, freq=freq)
Beispiel #9
0
    def _maybe_cast_slice_bound(self, label, side, kind):
        """
        This function should be overloaded in subclasses that allow non-trivial
        casting on label-slice bounds, e.g. datetime-like indices allowing
        strings containing formatted datetimes.

        Parameters
        ----------
        label : object
        side : {'left', 'right'}
        kind : string / None

        Returns
        -------
        label :  object

        Notes
        -----
        Value of `side` parameter should be validated in caller.

        """

        # we are a numeric index, so we accept
        # integer/floats directly
        if not (com.is_integer(label) or com.is_float(label)):
            self._invalid_indexer('slice', label)

        return label
Beispiel #10
0
    def delete(self, loc):
        """
        Make a new DatetimeIndex with passed location(s) deleted.

        Parameters
        ----------
        loc: int, slice or array of ints
            Indicate which sub-arrays to remove.

        Returns
        -------
        new_index : TimedeltaIndex
        """
        new_tds = np.delete(self.asi8, loc)

        freq = 'infer'
        if is_integer(loc):
            if loc in (0, -len(self), -1, len(self) - 1):
                freq = self.freq
        else:
            if com.is_list_like(loc):
                loc = lib.maybe_indices_to_slice(
                    com._ensure_int64(np.array(loc)), len(self))
            if isinstance(loc, slice) and loc.step in (1, None):
                if (loc.start in (0, None) or loc.stop in (len(self), None)):
                    freq = self.freq

        return TimedeltaIndex(new_tds, name=self.name, freq=freq)
Beispiel #11
0
    def _getitem_axis(self, key, axis=0):

        self._has_valid_type(key, axis)
        labels = self.obj._get_axis(axis)
        if isinstance(key, slice):
            return self._get_slice_axis(key, axis=axis)
        elif _is_list_like(key) and not (isinstance(key, tuple) and
                                         isinstance(labels, MultiIndex)):

            if hasattr(key, 'ndim') and key.ndim > 1:
                raise ValueError('Cannot index with multidimensional key')

            return self._getitem_iterable(key, axis=axis)
        else:
            if com.is_integer(key):
                if axis == 0 and isinstance(labels, MultiIndex):
                    try:
                        return self._get_label(key, axis=axis)
                    except (KeyError, TypeError):
                        if self.obj.index.levels[0].is_integer():
                            raise

                # this is the fallback! (for a non-float, non-integer index)
                if not labels.is_floating() and not labels.is_integer():
                    return self._get_loc(key, axis=axis)

            return self._get_label(key, axis=axis)
Beispiel #12
0
    def _maybe_cast_slice_bound(self, label, side, kind):
        """
        If label is a string, cast it to timedelta according to resolution.


        Parameters
        ----------
        label : object
        side : {'left', 'right'}
        kind : {'ix', 'loc', 'getitem'}

        Returns
        -------
        label :  object

        """
        assert kind in ['ix', 'loc', 'getitem', None]

        if isinstance(label, compat.string_types):
            parsed = _coerce_scalar_to_timedelta_type(label, box=True)
            lbound = parsed.round(parsed.resolution)
            if side == 'left':
                return lbound
            else:
                return (lbound + to_offset(parsed.resolution) -
                        Timedelta(1, 'ns'))
        elif is_integer(label) or is_float(label):
            self._invalid_indexer('slice', label)

        return label
Beispiel #13
0
def _convert_index(index):
    inferred_type = lib.infer_dtype(index)

    # Let's assume the index is homogeneous
    values = np.asarray(index)

    if inferred_type == 'datetime64':
        converted = values.view('i8')
        return converted, 'datetime64', _tables().Int64Col()
    elif isinstance(values[0], datetime):
        converted = np.array([(time.mktime(v.timetuple()) +
                            v.microsecond / 1E6) for v in values],
                            dtype=np.float64)
        return converted, 'datetime', _tables().Time64Col()
    elif isinstance(values[0], date):
        converted = np.array([time.mktime(v.timetuple()) for v in values],
                            dtype=np.int32)
        return converted, 'date', _tables().Time32Col()
    elif isinstance(values[0], basestring):
        converted = np.array(list(values), dtype=np.str_)
        itemsize = converted.dtype.itemsize
        return converted, 'string', _tables().StringCol(itemsize)
    elif com.is_integer(values[0]):
        # take a guess for now, hope the values fit
        atom = _tables().Int64Col()
        return np.asarray(values, dtype=np.int64), 'integer', atom
    elif com.is_float(values[0]):
        atom = _tables().Float64Col()
        return np.asarray(values, dtype=np.float64), 'float', atom
    else: # pragma: no cover
        atom = _tables().ObjectAtom()
        return np.asarray(values, dtype='O'), 'object', atom
Beispiel #14
0
def _convert_index(index):
    # Let's assume the index is homogeneous
    values = np.asarray(index)

    if isinstance(values[0], (datetime, date)):
        if isinstance(values[0], datetime):
            kind = 'datetime'
        else:
            kind = 'date'
        converted = np.array([time.mktime(v.timetuple()) for v in values],
                             dtype=np.int64)
        return converted, kind, _tables().Time64Col()
    elif isinstance(values[0], basestring):
        converted = np.array(list(values), dtype=np.str_)
        itemsize = converted.dtype.itemsize
        return converted, 'string', _tables().StringCol(itemsize)
    elif com.is_integer(values[0]):
        # take a guess for now, hope the values fit
        atom = _tables().Int64Col()
        return np.asarray(values, dtype=np.int64), 'integer', atom
    elif com.is_float(values[0]):
        atom = _tables().Float64Col()
        return np.asarray(values, dtype=np.float64), 'float', atom
    else: # pragma: no cover
        atom = _tables().ObjectAtom()
        return np.asarray(values, dtype='O'), 'object', atom
Beispiel #15
0
    def get_loc(self, key, method=None, tolerance=None):
        """
        Get integer location for requested label

        Returns
        -------
        loc : int
        """
        try:
            return self._engine.get_loc(key)
        except KeyError:
            if is_integer(key):
                raise

            try:
                asdt, parsed, reso = parse_time_string(key, self.freq)
                key = asdt
            except TypeError:
                pass

            try:
                key = Period(key, freq=self.freq)
            except ValueError:
                # we cannot construct the Period
                # as we have an invalid type
                raise KeyError(key)
            try:
                return Index.get_loc(self, key.ordinal, method, tolerance)
            except KeyError:
                raise KeyError(key)
Beispiel #16
0
 def __add__(self, other):
     from pandas.core.index import Index
     from pandas.tseries.tdi import TimedeltaIndex
     from pandas.tseries.offsets import DateOffset
     if isinstance(other, TimedeltaIndex):
         return self._add_delta(other)
     elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
         if hasattr(other, '_add_delta'):
             return other._add_delta(self)
         raise TypeError("cannot add TimedeltaIndex and {typ}"
                         .format(typ=type(other)))
     elif isinstance(other, Index):
         warnings.warn("using '+' to provide set union with "
                       "datetimelike Indexes is deprecated, "
                       "use .union()", FutureWarning, stacklevel=2)
         return self.union(other)
     elif isinstance(other, (DateOffset, timedelta, np.timedelta64,
                             tslib.Timedelta)):
         return self._add_delta(other)
     elif com.is_integer(other):
         return self.shift(other)
     elif isinstance(other, (tslib.Timestamp, datetime)):
         return self._add_datelike(other)
     else:  # pragma: no cover
         return NotImplemented
Beispiel #17
0
    def _convert_scalar_indexer(self, key, kind=None):
        """
        we don't allow integer or float indexing on datetime-like when using
        loc

        Parameters
        ----------
        key : label of the slice bound
        kind : {'ix', 'loc', 'getitem', 'iloc'} or None
        """

        assert kind in ['ix', 'loc', 'getitem', 'iloc', None]

        # we don't allow integer/float indexing for loc
        # we don't allow float indexing for ix/getitem
        if lib.isscalar(key):
            is_int = is_integer(key)
            is_flt = is_float(key)
            if kind in ['loc'] and (is_int or is_flt):
                self._invalid_indexer('index', key)
            elif kind in ['ix', 'getitem'] and is_flt:
                self._invalid_indexer('index', key)

        return (super(DatetimeIndexOpsMixin, self)
                ._convert_scalar_indexer(key, kind=kind))
Beispiel #18
0
    def _maybe_cast_slice_bound(self, label, side, kind):
        """
        If label is a string, cast it to timedelta according to resolution.


        Parameters
        ----------
        label : object
        side : {'left', 'right'}
        kind : string / None

        Returns
        -------
        label :  object

        """
        if isinstance(label, compat.string_types):
            parsed = _coerce_scalar_to_timedelta_type(label, box=True)
            lbound = parsed.round(parsed.resolution)
            if side == "left":
                return lbound
            else:
                return lbound + _resolution_map[parsed.resolution]() - Timedelta(1, "ns")
        elif is_integer(label) or is_float(label):
            self._invalid_indexer("slice", label)

        return label
Beispiel #19
0
 def __sub__(self, other):
     from pandas.core.index import Index
     from pandas.tseries.tdi import TimedeltaIndex
     from pandas.tseries.offsets import DateOffset
     if isinstance(other, TimedeltaIndex):
         return self._add_delta(-other)
     elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
         if not isinstance(other, TimedeltaIndex):
             raise TypeError("cannot subtract TimedeltaIndex and {typ}"
                             .format(typ=type(other)))
         return self._add_delta(-other)
     elif isinstance(other, Index):
         warnings.warn("using '-' to provide set differences with "
                       "datetimelike Indexes is deprecated, "
                       "use .difference()", FutureWarning, stacklevel=2)
         return self.difference(other)
     elif isinstance(other, (DateOffset, timedelta, np.timedelta64,
                             tslib.Timedelta)):
         return self._add_delta(-other)
     elif com.is_integer(other):
         return self.shift(-other)
     elif isinstance(other, (tslib.Timestamp, datetime)):
         return self._sub_datelike(other)
     elif isinstance(other, prlib.Period):
         return self._sub_period(other)
     else:  # pragma: no cover
         return NotImplemented
Beispiel #20
0
    def __new__(cls, data=None, ordinal=None,
                freq=None, start=None, end=None, periods=None,
                copy=False, name=None,
                year=None, month=None, quarter=None, day=None,
                hour=None, minute=None, second=None):

        freq = _freq_mod.get_standard_freq(freq)

        if periods is not None:
            if com.is_float(periods):
                periods = int(periods)
            elif not com.is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None:
            if ordinal is not None:
                data = np.asarray(ordinal, dtype=np.int64)
            else:
                fields = [year, month, quarter, day, hour, minute, second]
                data, freq = cls._generate_range(start, end, periods,
                                                    freq, fields)
        else:
            ordinal, freq = cls._from_arraylike(data, freq)
            data = np.array(ordinal, dtype=np.int64, copy=False)

        subarr = data.view(cls)
        subarr.name = name
        subarr.freq = freq

        return subarr
Beispiel #21
0
    def convert(values, unit, axis):
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (com.is_integer(values) or com.is_float(values)):
            return values
        elif isinstance(values, compat.string_types):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = com._asarray_tuplesafe(values)

            if com.is_integer_dtype(values) or com.is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Beispiel #22
0
    def convert(values, unit, axis):
        from pandas.tseries.index import DatetimeIndex

        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif com.is_integer(values) or com.is_float(values):
            return values
        elif isinstance(values, str):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = np.array(values, dtype="O")

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Beispiel #23
0
    def get_loc(self, key, method=None):
        """
        Get integer location for requested label

        Returns
        -------
        loc : int
        """
        try:
            return self._engine.get_loc(key)
        except KeyError:
            if is_integer(key):
                raise

            try:
                asdt, parsed, reso = parse_time_string(key, self.freq)
                key = asdt
            except TypeError:
                pass

            key = Period(key, self.freq)
            try:
                return Index.get_loc(self, key.ordinal, method=method)
            except KeyError:
                raise KeyError(key)
Beispiel #24
0
    def _maybe_cast_slice_bound(self, label, side, kind):
        """
        If label is a string or a datetime, cast it to Period.ordinal according to
        resolution.

        Parameters
        ----------
        label : object
        side : {'left', 'right'}
        kind : string / None

        Returns
        -------
        bound : Period or object

        Notes
        -----
        Value of `side` parameter should be validated in caller.

        """
        if isinstance(label, datetime):
            return Period(label, freq=self.freq)
        elif isinstance(label, compat.string_types):
            try:
                _, parsed, reso = parse_time_string(label, self.freq)
                bounds = self._parsed_string_to_bounds(reso, parsed)
                return bounds[0 if side == 'left' else 1]
            except Exception:
                raise KeyError(label)
        elif is_integer(label) or is_float(label):
            self._invalid_indexer('slice',label)

        return label
Beispiel #25
0
def _coerce_scalar_to_timedelta_type(r, unit='ns'):
    # kludgy here until we have a timedelta scalar
    # handle the numpy < 1.7 case

    def conv(v):
        if _np_version_under1p7:
            return timedelta(microseconds=v/1000.0)
        return np.timedelta64(v)

    if isinstance(r, compat.string_types):
        converter = _get_string_converter(r, unit=unit)
        r = converter()
        r = conv(r)
    elif r == tslib.iNaT:
        return r
    elif isinstance(r, np.timedelta64):
        r = r.astype("m8[{0}]".format(unit.lower()))
    elif is_integer(r):
        r = tslib.cast_from_unit(r, unit)
        r = conv(r)

    if _np_version_under1p7:
        if not isinstance(r, timedelta):
            raise AssertionError("Invalid type for timedelta scalar: %s" % type(r))
        if compat.PY3:
            # convert to microseconds in timedelta64
            r = np.timedelta64(int(r.total_seconds()*1e9 + r.microseconds*1000))
        else:
            return r

    if isinstance(r, timedelta):
        r = np.timedelta64(r)
    elif not isinstance(r, np.timedelta64):
        raise AssertionError("Invalid type for timedelta scalar: %s" % type(r))
    return r.astype('timedelta64[ns]')
Beispiel #26
0
def _ensure_datetime64(other):
    if isinstance(other, np.datetime64):
        return other
    elif com.is_integer(other):
        return np.int64(other).view("M8[us]")
    else:
        raise TypeError(other)
Beispiel #27
0
 def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
     freq = getattr(self, 'freqstr',
                    getattr(self, 'inferred_freq', None))
     if is_integer(key) or is_float(key):
         self._invalid_indexer('slice', key)
     loc = self._partial_td_slice(key, freq, use_lhs=use_lhs,
                                  use_rhs=use_rhs)
     return loc
Beispiel #28
0
def _maybe_get_tz(tz):
    if isinstance(tz, compat.string_types):
        import pytz
        tz = pytz.timezone(tz)
    if com.is_integer(tz):
        import pytz
        tz = pytz.FixedOffset(tz / 60)
    return tz
Beispiel #29
0
 def _convert_key(self, key):
     """ require  integer args (and convert to label arguments) """
     ckey = []
     for a, i in zip(self.obj.axes,key):
         if not com.is_integer(i):
             raise ValueError("iAt based indexing can only have integer indexers")
         ckey.append(a[i])
     return ckey
Beispiel #30
0
def _offset(window, center):
    if not com.is_integer(window):
        window = len(window)
    offset = (window - 1) / 2. if center else 0
    try:
        return int(offset)
    except:
        return offset.astype(int)
Beispiel #31
0
def rolling_window(arg,
                   window=None,
                   win_type=None,
                   min_periods=None,
                   freq=None,
                   center=False,
                   mean=True,
                   time_rule=None,
                   axis=0,
                   **kwargs):
    """
    Applies a moving window of type ``window_type`` and size ``window``
    on the data.

    Parameters
    ----------
    arg : Series, DataFrame
    window : int or ndarray
        Weighting window specification. If the window is an integer, then it is
        treated as the window length and win_type is required
    win_type : str, default None
        Window type (see Notes)
    min_periods : int, default None
        Minimum number of observations in window required to have a value
        (otherwise result is NA).
    freq : string or DateOffset object, optional (default None)
        Frequency to conform the data to before computing the statistic. Specified
        as a frequency string or DateOffset object. `time_rule` is a legacy alias
        for `freq`.
    center : boolean, default False
        Whether the label should correspond with center of window
    mean : boolean, default True
        If True computes weighted mean, else weighted sum
    axis : {0, 1}, default 0

    Returns
    -------
    y : type of input argument

    Notes
    -----
    The recognized window types are:

    * ``boxcar``
    * ``triang``
    * ``blackman``
    * ``hamming``
    * ``bartlett``
    * ``parzen``
    * ``bohman``
    * ``blackmanharris``
    * ``nuttall``
    * ``barthann``
    * ``kaiser`` (needs beta)
    * ``gaussian`` (needs std)
    * ``general_gaussian`` (needs power, width)
    * ``slepian`` (needs width).
    
    By default, the result is set to the right edge of the window. This can be
    changed to the center of the window by setting ``center=True``.

    The `freq` keyword is used to conform time series data to a specified
    frequency by resampling the data. This is done with the default parameters
    of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
    """
    if isinstance(window, (list, tuple, np.ndarray)):
        if win_type is not None:
            raise ValueError(('Do not specify window type if using custom '
                              'weights'))
        window = com._asarray_tuplesafe(window).astype(float)
    elif com.is_integer(window):  # window size
        if win_type is None:
            raise ValueError('Must specify window type')
        try:
            import scipy.signal as sig
        except ImportError:
            raise ImportError('Please install scipy to generate window weight')
        win_type = _validate_win_type(win_type, kwargs)  # may pop from kwargs
        window = sig.get_window(win_type, window).astype(float)
    else:
        raise ValueError('Invalid window %s' % str(window))

    minp = _use_window(min_periods, len(window))

    arg = _conv_timerule(arg, freq, time_rule)
    return_hook, values = _process_data_structure(arg)

    f = lambda x: algos.roll_window(x, window, minp, avg=mean)
    result = np.apply_along_axis(f, axis, values)

    rs = return_hook(result)
    if center:
        rs = _center_window(rs, len(window), axis)
    return rs
Beispiel #32
0
def plot_lines(df, x_col, y_col, colorby_col=None, splitby_col=None,
               color_map=None, use_suptitle=True, use_subplots=False):
    """Plot a metric of mapreduce as a function of parameters.
    Data are first grouped by split_by to produce one figure per value.
    In each figure, data are grouped by colorby_col to create one line per
    value.
    If you have other parameters you must first group the results according to
    this parameter and then pass them to this function.
    We don't allow a lot of plot options but they can be changed with
    matplotlib API. The only option is the colormap (because line plot don't
    use it so we emulate that functionality).
    However we try to be a bit smart on axis labels.

    Parameters
    ----------
    df: the dataframe containing the results.

    x_col: column name (or index level) of the x-axis.

    y_col: column name of the y-axis (it makes no sense to have an index level
    here).

    colorby_col: column name (or index level) that define colored lines.
    One line per value or level.

    splitby_col: column name (or index level) that define how to split figures.
    One figure per value or level (see use_subplots).

    color_map: color map to use (if None, use default colors). It's a
    dictionary whose key are the values of colorby_col and the values a
    matplotlib color.

    use_suptitle: if True, use values of splitby_col as suptitle (it's hard to
    tune).

    use_subplots: create a subplot instead of a figure for each value of
    splitby_col

    Returns
    -------
    handles: dictionnary of figure handles.
    The key is the value of splitby_col and the value is the figure handler (or
    the subplot if use_subplots is True).
    """
    # pandas.DataFrame.plot don't work properly if x_col is an index
    import pandas.core.common as com
    x_col_is_index = com.is_integer(x_col)
    # x_name is the name of the column/index used for x axis
    x_name = df.index.names[x_col] if x_col_is_index \
                                   else x_col
    colorby_col_is_index = com.is_integer(colorby_col)
    # colorby_name is the name of the column/index used for lines
    colorby_name = df.index.names[colorby_col] if colorby_col_is_index \
                                               else colorby_col
    # Not useful for now
    splitby_col_is_index = com.is_integer(splitby_col)

    # Labels (note that y_col is supposed to be a good label here)
    x_label = x_name
    colorby_label = colorby_name

    try:
        # Try to group by column name
        fig_groups = df.groupby(splitby_col)
    except KeyError:
        try:
            # Try to group by index level
            fig_groups = df.groupby(level=splitby_col)
        except:
            raise Exception("Cannot group by splitby_col.")
    handles = {}
    fig_created = False
    for i, (splitby_col_val, splitby_col_group) in enumerate(fig_groups, 1):
        if use_subplots:
            if not fig_created:
                h = plt.figure()
                n_plots = len(fig_groups)
                n_rows = math.floor(math.sqrt(n_plots))
                n_cols = math.ceil(float(n_plots) / float(n_rows))
                fig_created = True
            handles[splitby_col_val] = plt.subplot(n_rows, n_cols, i)
        else:
            h = plt.figure()
            handles[splitby_col_val] = h
        if use_suptitle:
            plt.suptitle(splitby_col_val)
        try:
            # Try to group by column name
            color_groups = splitby_col_group.groupby(colorby_col)
        except KeyError:
            try:
                # Try to group by index level
                color_groups = splitby_col_group.groupby(level=colorby_col)
            except:
                raise Exception("Cannot group by colorby_col.")
        for colorby_col_val, colorby_col_group in color_groups:
            if x_col_is_index:
                # Remove index: x_col is now referred as x_name
                colorby_col_group.reset_index(level=x_col, inplace=True)
            colorby_col_group.sort(x_name, inplace=True)
            if color_map is None:
                colorby_col_group.plot(x=x_name, y=y_col,
                                       label=colorby_col_val)
            else:
                colorby_col_group.plot(x=x_name, y=y_col,
                                       label=colorby_col_val,
                                       color=color_map[colorby_col_val])
            plt.xlabel(x_label)
            plt.ylabel(y_col)
        plt.legend(title=colorby_label)
    return handles
Beispiel #33
0
 def __add__(self, other):
     if com.is_integer(other):
         return Period(ordinal=self.ordinal + other, freq=self.freq)
     else:  # pragma: no cover
         raise TypeError(other)
Beispiel #34
0
    def __init__(self, value=None, freq=None, ordinal=None,
                 year=None, month=1, quarter=None, day=1,
                 hour=0, minute=0, second=0):
        # freq points to a tuple (base, mult);  base is one of the defined
        # periods such as A, Q, etc. Every five minutes would be, e.g.,
        # ('T', 5) but may be passed in as a string like '5T'

        self.freq = None

        # ordinal is the period offset from the gregorian proleptic epoch
        self.ordinal = None

        if ordinal is not None and value is not None:
            raise ValueError(("Only value or ordinal but not both should be "
                              "given but not both"))
        elif ordinal is not None:
            if not com.is_integer(ordinal):
                raise ValueError("Ordinal must be an integer")
            if freq is None:
                raise ValueError('Must supply freq for ordinal value')
            self.ordinal = ordinal

        elif value is None:
            if freq is None:
                raise ValueError("If value is None, freq cannot be None")

            self.ordinal = _ordinal_from_fields(year, month, quarter, day,
                                                hour, minute, second, freq)

        elif isinstance(value, Period):
            other = value
            if freq is None or _gfc(freq) == _gfc(other.freq):
                self.ordinal = other.ordinal
                freq = other.freq
            else:
                converted = other.asfreq(freq)
                self.ordinal = converted.ordinal

        elif isinstance(value, compat.string_types) or com.is_integer(value):
            if com.is_integer(value):
                value = str(value)

            dt, freq = _get_date_and_freq(value, freq)

        elif isinstance(value, datetime):
            dt = value
            if freq is None:
                raise ValueError('Must supply freq for datetime value')
        elif isinstance(value, date):
            dt = datetime(year=value.year, month=value.month, day=value.day)
            if freq is None:
                raise ValueError('Must supply freq for datetime value')
        else:
            msg = "Value must be Period, string, integer, or datetime"
            raise ValueError(msg)

        base, mult = _gfc(freq)
        if mult != 1:
            # TODO: Better error message - this is slightly confusing
            raise ValueError('Only mult == 1 supported')

        if self.ordinal is None:
            self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day,
                                                dt.hour, dt.minute, dt.second, dt.microsecond, 0,
                                                base)

        self.freq = _freq_mod._get_freq_str(base)
Beispiel #35
0
    def __new__(cls,
                data=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                copy=False,
                name=None,
                tz=None,
                verify_integrity=True,
                normalize=False,
                **kwds):

        dayfirst = kwds.pop('dayfirst', None)
        yearfirst = kwds.pop('yearfirst', None)
        warn = False
        if 'offset' in kwds and kwds['offset']:
            freq = kwds['offset']
            warn = True

        freq_infer = False
        if not isinstance(freq, DateOffset):
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if warn:
            import warnings
            warnings.warn(
                "parameter 'offset' is deprecated, "
                "please use 'freq' instead", FutureWarning)

        offset = freq

        if periods is not None:
            if com.is_float(periods):
                periods = int(periods)
            elif not com.is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None and offset is None:
            raise ValueError("Must provide freq argument if no data is "
                             "supplied")

        if data is None:
            return cls._generate(start,
                                 end,
                                 periods,
                                 name,
                                 offset,
                                 tz=tz,
                                 normalize=normalize)

        if not isinstance(data, np.ndarray):
            if np.isscalar(data):
                raise ValueError('DatetimeIndex() must be called with a '
                                 'collection of some kind, %s was passed' %
                                 repr(data))

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data, dtype='O')

            # try a few ways to make it datetime64
            if lib.is_string_array(data):
                data = _str_to_dt_array(data,
                                        offset,
                                        dayfirst=dayfirst,
                                        yearfirst=yearfirst)
            else:
                data = tools.to_datetime(data)
                data.offset = offset
                if isinstance(data, DatetimeIndex):
                    if name is not None:
                        data.name = name

                    if tz is not None:
                        return data.tz_localize(tz)

                    return data

        if issubclass(data.dtype.type, basestring):
            subarr = _str_to_dt_array(data,
                                      offset,
                                      dayfirst=dayfirst,
                                      yearfirst=yearfirst)
        elif issubclass(data.dtype.type, np.datetime64):
            if isinstance(data, DatetimeIndex):
                if tz is None:
                    tz = data.tz

                subarr = data.values

                if offset is None:
                    offset = data.offset
                    verify_integrity = False
            else:
                if data.dtype != _NS_DTYPE:
                    subarr = lib.cast_to_nanoseconds(data)
                else:
                    subarr = data
        elif data.dtype == _INT64_DTYPE:
            if isinstance(data, Int64Index):
                raise TypeError('cannot convert Int64Index->DatetimeIndex')
            if copy:
                subarr = np.asarray(data, dtype=_NS_DTYPE)
            else:
                subarr = data.view(_NS_DTYPE)
        else:
            try:
                subarr = tools.to_datetime(data)
            except ValueError:
                # tz aware
                subarr = tools.to_datetime(data, utc=True)

            if not np.issubdtype(subarr.dtype, np.datetime64):
                raise TypeError('Unable to convert %s to datetime dtype' %
                                str(data))

        if isinstance(subarr, DatetimeIndex):
            if tz is None:
                tz = subarr.tz
        else:
            if tz is not None:
                tz = tools._maybe_get_tz(tz)

                if (not isinstance(data, DatetimeIndex)
                        or getattr(data, 'tz', None) is None):
                    # Convert tz-naive to UTC
                    ints = subarr.view('i8')
                    subarr = lib.tz_localize_to_utc(ints, tz)

                subarr = subarr.view(_NS_DTYPE)

        subarr = subarr.view(cls)
        subarr.name = name
        subarr.offset = offset
        subarr.tz = tz

        if verify_integrity and len(subarr) > 0:
            if offset is not None and not freq_infer:
                inferred = subarr.inferred_freq
                if inferred != offset.freqstr:
                    raise ValueError('Dates do not conform to passed '
                                     'frequency')

        if freq_infer:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.offset = to_offset(inferred)

        return subarr
Beispiel #36
0
    def _setitem_with_indexer(self, indexer, value):

        self._has_valid_setitem_indexer(indexer)

        # also has the side effect of consolidating in-place
        from pandas import Panel, DataFrame, Series

        # maybe partial set
        take_split_path = self.obj._is_mixed_type
        if isinstance(indexer, tuple):
            nindexer = []
            for i, idx in enumerate(indexer):
                if isinstance(idx, dict):

                    # reindex the axis to the new value
                    # and set inplace
                    key, _ = _convert_missing_indexer(idx)

                    # if this is the items axes, then take the main missing path
                    # first; this correctly sets the dtype and avoids cache issues
                    # essentially this separates out the block that is needed to possibly
                    # be modified
                    if self.ndim > 1 and i == self.obj._info_axis_number:

                        # add the new item, and set the value
                        new_indexer = _convert_from_missing_indexer_tuple(
                            indexer)
                        self.obj[key] = np.nan
                        self.obj.loc[new_indexer] = value
                        return self.obj

                    # reindex the axis
                    index = self.obj._get_axis(i)
                    labels = _safe_append_to_index(index, key)
                    self.obj._data = self.obj.reindex_axis(labels, i)._data

                    if isinstance(labels, MultiIndex):
                        self.obj.sortlevel(inplace=True)
                        labels = self.obj._get_axis(i)

                    nindexer.append(labels.get_loc(key))

                else:
                    nindexer.append(idx)

            indexer = tuple(nindexer)
        else:

            indexer, missing = _convert_missing_indexer(indexer)

            if missing:

                # reindex the axis to the new value
                # and set inplace
                if self.ndim == 1:
                    index = self.obj.index
                    if len(index) == 0:
                        new_index = Index([indexer])
                    else:
                        new_index = _safe_append_to_index(index, indexer)

                    new_values = np.concatenate([self.obj.values, [value]])
                    self.obj._data = self.obj._constructor(new_values,
                                                           index=new_index,
                                                           name=self.obj.name)
                    return self.obj

                elif self.ndim == 2:
                    index = self.obj._get_axis(0)
                    labels = _safe_append_to_index(index, indexer)
                    self.obj._data = self.obj.reindex_axis(labels, 0)._data
                    return getattr(self.obj,
                                   self.name).__setitem__(indexer, value)

                # set using setitem (Panel and > dims)
                elif self.ndim >= 3:
                    return self.obj.__setitem__(indexer, value)

        # set
        info_axis = self.obj._info_axis_number
        item_labels = self.obj._get_axis(info_axis)

        # if we have a complicated setup, take the split path
        if isinstance(indexer, tuple) and any(
            [isinstance(ax, MultiIndex) for ax in self.obj.axes]):
            take_split_path = True

        # align and set the values
        if take_split_path:

            if not isinstance(indexer, tuple):
                indexer = self._tuplify(indexer)

            if isinstance(value, ABCSeries):
                value = self._align_series(indexer, value)

            info_idx = indexer[info_axis]
            if com.is_integer(info_idx):
                info_idx = [info_idx]
            labels = item_labels[info_idx]

            # if we have a partial multiindex, then need to adjust the plane indexer here
            if len(labels) == 1 and isinstance(self.obj[labels[0]].index,
                                               MultiIndex):
                index = self.obj[labels[0]].index
                idx = indexer[:info_axis][0]
                try:
                    if idx in index:
                        idx = index.get_loc(idx)
                except:
                    pass
                plane_indexer = tuple([idx]) + indexer[info_axis + 1:]
                lplane_indexer = _length_of_indexer(plane_indexer[0], index)

                if is_list_like(value) and lplane_indexer != len(value):
                    raise ValueError(
                        "cannot set using a multi-index selection indexer with a different length than the value"
                    )

            # non-mi
            else:
                plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
                if info_axis > 0:
                    plane_axis = self.obj.axes[:info_axis][0]
                    lplane_indexer = _length_of_indexer(
                        plane_indexer[0], plane_axis)
                else:
                    lplane_indexer = 0

            def setter(item, v):
                s = self.obj[item]
                pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer

                # set the item, possibly having a dtype change
                s = s.copy()
                s._data = s._data.setitem(pi, v)
                self.obj[item] = s

            def can_do_equal_len():
                """ return True if we have an equal len settable """
                if not len(labels) == 1:
                    return False

                l = len(value)
                item = labels[0]
                index = self.obj[item].index

                # equal len list/ndarray
                if len(index) == l:
                    return True
                elif lplane_indexer == l:
                    return True

                return False

            if _is_list_like(value):

                # we have an equal len Frame
                if isinstance(value, ABCDataFrame) and value.ndim > 1:

                    for item in labels:

                        # align to
                        if item in value:
                            v = value[item]
                            v = v.reindex(self.obj[item].index & v.index)
                            setter(item, v.values)
                        else:
                            setter(item, np.nan)

                # we have an equal len ndarray to our labels
                elif isinstance(value, np.ndarray) and value.ndim == 2:
                    if len(labels) != value.shape[1]:
                        raise ValueError(
                            'Must have equal len keys and value when'
                            ' setting with an ndarray')

                    for i, item in enumerate(labels):
                        setter(item, value[:, i])

                # we have an equal len list/ndarray
                elif can_do_equal_len():
                    setter(labels[0], value)

                # per label values
                else:

                    for item, v in zip(labels, value):
                        setter(item, v)
            else:

                # scalar
                for item in labels:
                    setter(item, value)

        else:
            if isinstance(indexer, tuple):
                indexer = _maybe_convert_ix(*indexer)

            if isinstance(value, ABCSeries):
                value = self._align_series(indexer, value)

            elif isinstance(value, ABCDataFrame):
                value = self._align_frame(indexer, value)

            if isinstance(value, ABCPanel):
                value = self._align_panel(indexer, value)

            self.obj._data = self.obj._data.setitem(indexer, value)
Beispiel #37
0
    def _convert_to_indexer(self, obj, axis=0, is_setter=False):
        """
        Convert indexing key into something we can use to do actual fancy
        indexing on an ndarray

        Examples
        ix[:5] -> slice(0, 5)
        ix[[1,2,3]] -> [1,2,3]
        ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)

        Going by Zen of Python?
        "In the face of ambiguity, refuse the temptation to guess."
        raise AmbiguousIndexError with integer labels?
        - No, prefer label-based indexing
        """
        labels = self.obj._get_axis(axis)

        # if we are a scalar indexer and not type correct raise
        obj = self._convert_scalar_indexer(obj, axis)

        # see if we are positional in nature
        is_int_index = labels.is_integer()
        is_int_positional = com.is_integer(obj) and not is_int_index

        # if we are a label return me
        try:
            return labels.get_loc(obj)
        except (KeyError, TypeError):
            pass
        except (ValueError):
            if not is_int_positional:
                raise

        # a positional
        if is_int_positional:

            # if we are setting and its not a valid location
            # its an insert which fails by definition
            if is_setter:
                if obj >= len(self.obj) and not isinstance(labels, MultiIndex):
                    raise ValueError(
                        "cannot set by positional indexing with enlargement")

            return obj

        if isinstance(obj, slice):
            return self._convert_slice_indexer(obj, axis)

        elif _is_list_like(obj):
            if com._is_bool_indexer(obj):
                obj = _check_bool_indexer(labels, obj)
                inds, = obj.nonzero()
                return inds
            else:
                if isinstance(obj, Index):
                    objarr = obj.values
                else:
                    objarr = _asarray_tuplesafe(obj)

                # If have integer labels, defer to label-based indexing
                if is_integer_dtype(objarr) and not is_int_index:
                    if labels.inferred_type != 'integer':
                        objarr = np.where(objarr < 0,
                                          len(labels) + objarr, objarr)
                    return objarr

                # this is not the most robust, but...
                if (isinstance(labels, MultiIndex)
                        and not isinstance(objarr[0], tuple)):
                    level = 0
                    _, indexer = labels.reindex(objarr, level=level)

                    check = labels.levels[0].get_indexer(objarr)
                else:
                    level = None

                    # unique index
                    if labels.is_unique:
                        indexer = check = labels.get_indexer(objarr)

                    # non-unique (dups)
                    else:
                        indexer, missing = labels.get_indexer_non_unique(
                            objarr)
                        check = indexer

                mask = check == -1
                if mask.any():

                    # mi here
                    if isinstance(obj, tuple) and is_setter:
                        return {'key': obj}
                    raise KeyError('%s not in index' % objarr[mask])

                return indexer

        else:
            try:
                return labels.get_loc(obj)
            except (KeyError):

                # allow a not found key only if we are a setter
                if not is_list_like(obj) and is_setter:
                    return {'key': obj}
                raise
 def _has_valid_type(self, key, axis):
     return isinstance(key, slice) or com.is_integer(key) or com._is_bool_indexer(key) or _is_list_like(key)
Beispiel #39
0
    def __init__(self, value=None, freq=None, ordinal=None,
                 year=None, month=1, quarter=None, day=1,
                 hour=0, minute=0, second=0):
        """
        Represents an period of time

        Parameters
        ----------
        value : Period or basestring, default None
            The time period represented (e.g., '4Q2005')
        freq : str, default None
            e.g., 'B' for businessday, ('T', 5) or '5T' for 5 minutes
        year : int, default None
        month : int, default 1
        quarter : int, default None
        day : int, default 1
        hour : int, default 0
        minute : int, default 0
        second : int, default 0
        """
        # freq points to a tuple (base, mult);  base is one of the defined
        # periods such as A, Q, etc. Every five minutes would be, e.g.,
        # ('T', 5) but may be passed in as a string like '5T'

        self.freq = None

        # ordinal is the period offset from the gregorian proleptic epoch
        self.ordinal = None

        if ordinal is not None and value is not None:
            raise ValueError(("Only value or ordinal but not both should be "
                              "given but not both"))
        elif ordinal is not None:
            if not com.is_integer(ordinal):
                raise ValueError("Ordinal must be an integer")
            if freq is None:
                raise ValueError('Must supply freq for ordinal value')
            self.ordinal = ordinal

        elif value is None:
            if freq is None:
                raise ValueError("If value is None, freq cannot be None")

            self.ordinal = _ordinal_from_fields(year, month, quarter, day,
                                                hour, minute, second, freq)

        elif isinstance(value, Period):
            other = value
            if freq is None or _gfc(freq) == _gfc(other.freq):
                self.ordinal = other.ordinal
                freq = other.freq
            else:
                converted = other.asfreq(freq)
                self.ordinal = converted.ordinal

        elif isinstance(value, basestring) or com.is_integer(value):
            if com.is_integer(value):
                value = str(value)

            dt, freq = _get_date_and_freq(value, freq)

        elif isinstance(value, datetime):
            dt = value
            if freq is None:
                raise ValueError('Must supply freq for datetime value')
        elif isinstance(value, date):
            dt = datetime(year=value.year, month=value.month, day=value.day)
            if freq is None:
                raise ValueError('Must supply freq for datetime value')
        else:
            msg = "Value must be Period, string, integer, or datetime"
            raise ValueError(msg)

        base, mult = _gfc(freq)
        if mult != 1:
            raise ValueError('Only mult == 1 supported')

        if self.ordinal is None:
            self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day,
                                                dt.hour, dt.minute, dt.second,
                                                base)

        self.freq = _freq_mod._get_freq_str(base)
Beispiel #40
0
 def _is_valid_index(x):
     return (com.is_integer(x) or com.is_float(x)
             and np.allclose(x, int(x), rtol=_eps, atol=0))
Beispiel #41
0
 def _is_valid_index(x):
     return com.is_integer(x)
Beispiel #42
0
    def __init__(self, f, delimiter=None, dialect=None, names=None, header=0,
                 index_col=None, na_values=None, keep_default_na=True,
                 thousands=None,
                 comment=None, parse_dates=False, keep_date_col=False,
                 date_parser=None, dayfirst=False,
                 chunksize=None, skiprows=None, skip_footer=0, converters=None,
                 verbose=False, encoding=None, squeeze=False):
        """
        Workhorse function for processing nested list into DataFrame

        Should be replaced by np.genfromtxt eventually?
        """
        self.data = None
        self.buf = []
        self.pos = 0
        self.names = list(names) if names is not None else names
        self.header = header
        self.index_col = index_col
        self.chunksize = chunksize
        self.passed_names = names is not None
        self.encoding = encoding

        self.parse_dates = parse_dates
        self.keep_date_col = keep_date_col
        self.date_parser = date_parser
        self.dayfirst = dayfirst

        if com.is_integer(skiprows):
            skiprows = range(skiprows)
        self.skiprows = set() if skiprows is None else set(skiprows)

        self.skip_footer = skip_footer
        self.delimiter = delimiter
        self.dialect = dialect
        self.verbose = verbose

        if converters is not None:
            assert(isinstance(converters, dict))
            self.converters = converters
        else:
            self.converters = {}

        assert(self.skip_footer >= 0)

        self.keep_default_na = keep_default_na
        if na_values is None and keep_default_na:
            self.na_values = _NA_VALUES
        elif isinstance(na_values, dict):
            if keep_default_na:
                for k, v in na_values.iteritems():
                    v = set(list(v)) | _NA_VALUES
                    na_values[k] = v
            self.na_values = na_values
        else:
            na_values = set(list(na_values))
            if keep_default_na:
                na_values = na_values | _NA_VALUES
            self.na_values = na_values

        self.thousands = thousands
        self.comment = comment
        self._comment_lines = []

        if hasattr(f, 'readline'):
            self._make_reader(f)
        else:
            self.data = f
        self.columns = self._infer_columns()

        # needs to be cleaned/refactored
        # multiple date column thing turning into a real sphaghetti factory

        # get popped off for index
        self.orig_columns = list(self.columns)

        self.index_name = None
        self._name_processed = False
        if not self._has_complex_date_col:
            self.index_name = self._get_index_name()
            self._name_processed = True
        self._first_chunk = True

        self.squeeze = squeeze
Beispiel #43
0
 def _can_hold_element(self, element):
     return com.is_integer(element) or isinstance(element, datetime)
Beispiel #44
0
    def test_is_integer(self):
        self.assertTrue(com.is_integer(1))
        self.assertTrue(com.is_integer(np.int64(1)))

        self.assertFalse(com.is_integer(True))
        self.assertFalse(com.is_integer(1.1))
        self.assertFalse(com.is_integer(1 + 3j))
        self.assertFalse(com.is_integer(np.bool(False)))
        self.assertFalse(com.is_integer(np.bool_(False)))
        self.assertFalse(com.is_integer(np.float64(1.1)))
        self.assertFalse(com.is_integer(np.complex128(1 + 3j)))
        self.assertFalse(com.is_integer(np.nan))
        self.assertFalse(com.is_integer(None))
        self.assertFalse(com.is_integer('x'))
        self.assertFalse(com.is_integer(datetime(2011, 1, 1)))
        self.assertFalse(com.is_integer(np.datetime64('2011-01-01')))
        self.assertFalse(com.is_integer(pd.Timestamp('2011-01-01')))
        self.assertFalse(
            com.is_integer(pd.Timestamp('2011-01-01', tz='US/Eastern')))
        self.assertFalse(com.is_integer(timedelta(1000)))
        self.assertFalse(com.is_integer(pd.Timedelta('1 days')))

        # questionable
        self.assertTrue(com.is_integer(np.timedelta64(1, 'D')))
Beispiel #45
0
    def __new__(cls,
                data=None,
                unit=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                copy=False,
                name=None,
                closed=None,
                verify_integrity=True,
                **kwargs):

        if isinstance(data, TimedeltaIndex) and freq is None:
            if copy:
                data = data.copy()
            return data

        freq_infer = False
        if not isinstance(freq, DateOffset):

            # if a passed freq is None, don't infer automatically
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None and freq is None:
            raise ValueError("Must provide freq argument if no data is "
                             "supplied")

        if data is None:
            return cls._generate(start,
                                 end,
                                 periods,
                                 name,
                                 freq,
                                 closed=closed)

        if unit is not None:
            data = to_timedelta(data, unit=unit, box=False)

        if not isinstance(data, (np.ndarray, Index, ABCSeries)):
            if np.isscalar(data):
                raise ValueError('TimedeltaIndex() must be called with a '
                                 'collection of some kind, %s was passed' %
                                 repr(data))

        # convert if not already
        if getattr(data, 'dtype', None) != _TD_DTYPE:
            data = to_timedelta(data, unit=unit, box=False)
        elif copy:
            data = np.array(data, copy=True)

        # check that we are matching freqs
        if verify_integrity and len(data) > 0:
            if freq is not None and not freq_infer:
                index = cls._simple_new(data, name=name)
                inferred = index.inferred_freq
                if inferred != freq.freqstr:
                    on_freq = cls._generate(index[0], None, len(index), name,
                                            freq)
                    if not np.array_equal(index.asi8, on_freq.asi8):
                        raise ValueError(
                            'Inferred frequency {0} from passed timedeltas does not '
                            'conform to passed frequency {1}'.format(
                                inferred, freq.freqstr))
                index.freq = freq
                return index

        if freq_infer:
            index = cls._simple_new(data, name=name)
            inferred = index.inferred_freq
            if inferred:
                index.freq = to_offset(inferred)
            return index

        return cls._simple_new(data, name=name, freq=freq)
Beispiel #46
0
    def _setitem_with_indexer(self, indexer, value):

        # also has the side effect of consolidating in-place
        # mmm, spaghetti

        if self.obj._is_mixed_type:
            if not isinstance(indexer, tuple):
                indexer = self._tuplify(indexer)

            if isinstance(value, ABCSeries):
                value = self._align_series(indexer, value)

            info_axis = self.obj._info_axis_number
            info_idx = indexer[info_axis]

            if com.is_integer(info_idx):
                info_idx = [info_idx]

            plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
            item_labels = self.obj._get_axis(info_axis)

            def setter(item, v):
                s = self.obj[item]
                pi = plane_indexer[0] if len(
                    plane_indexer) == 1 else plane_indexer

                # set the item, possibly having a dtype change
                s = s.copy()
                s._data = s._data.setitem(pi, v)
                self.obj[item] = s

            labels = item_labels[info_idx]

            if _is_list_like(value):

                # we have an equal len Frame
                if isinstance(value, ABCDataFrame) and value.ndim > 1:

                    for item in labels:

                        # align to
                        if item in value:
                            v = value[item]
                            v = v.reindex(self.obj[item].index & v.index)
                            setter(item, v.values)
                        else:
                            setter(item, np.nan)

                # we have an equal len ndarray to our labels
                elif isinstance(value, np.ndarray) and value.ndim == 2:
                    if len(labels) != value.shape[1]:
                        raise ValueError(
                            'Must have equal len keys and value when'
                            ' setting with an ndarray')

                    for i, item in enumerate(labels):
                        setter(item, value[:, i])

                # we have an equal len list/ndarray
                elif len(labels) == 1 and (
                        len(self.obj[labels[0]]) == len(value)
                        or len(plane_indexer[0]) == len(value)):
                    setter(labels[0], value)

                # per label values
                else:

                    for item, v in zip(labels, value):
                        setter(item, v)
            else:

                # scalar
                for item in labels:
                    setter(item, value)

        else:
            if isinstance(indexer, tuple):
                indexer = _maybe_convert_ix(*indexer)

            if isinstance(value, ABCSeries):
                value = self._align_series(indexer, value)

            elif isinstance(value, ABCDataFrame):
                value = self._align_frame(indexer, value)

            if isinstance(value, ABCPanel):
                value = self._align_panel(indexer, value)

            self.obj._data = self.obj._data.setitem(indexer, value)
Beispiel #47
0
    def _convert_to_indexer(self, obj, axis=0):
        """
        Convert indexing key into something we can use to do actual fancy
        indexing on an ndarray

        Examples
        ix[:5] -> slice(0, 5)
        ix[[1,2,3]] -> [1,2,3]
        ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)

        Going by Zen of Python?
        "In the face of ambiguity, refuse the temptation to guess."
        raise AmbiguousIndexError with integer labels?
        - No, prefer label-based indexing
        """
        labels = self.obj._get_axis(axis)
        is_int_index = _is_integer_index(labels)

        if com.is_integer(obj) and not is_int_index:
            return obj

        try:
            return labels.get_loc(obj)
        except (KeyError, TypeError):
            pass

        if isinstance(obj, slice):
            ltype = labels.inferred_type

            if ltype == 'floating':
                int_slice = _is_int_slice(obj)
            else:
                # floats that are within tolerance of int used
                int_slice = _is_index_slice(obj)

            null_slice = obj.start is None and obj.stop is None
            # could have integers in the first level of the MultiIndex
            position_slice = (int_slice
                              and not ltype == 'integer'
                              and not isinstance(labels, MultiIndex))

            start, stop = obj.start, obj.stop

            # last ditch effort: if we are mixed and have integers
            try:
                if 'mixed' in ltype and int_slice:
                    if start is not None:
                        i = labels.get_loc(start)
                    if stop is not None:
                        j = labels.get_loc(stop)
                    position_slice = False
            except KeyError:
                if ltype == 'mixed-integer-float':
                    raise

            if null_slice or position_slice:
                slicer = obj
            else:
                try:
                    i, j = labels.slice_locs(start, stop)
                    slicer = slice(i, j, obj.step)
                except Exception:
                    if _is_index_slice(obj):
                        if labels.inferred_type == 'integer':
                            raise
                        slicer = obj
                    else:
                        raise

            return slicer

        elif _is_list_like(obj):
            if com._is_bool_indexer(obj):
                objarr = _check_bool_indexer(labels, obj)
                return objarr
            else:
                if isinstance(obj, Index):
                    objarr = obj.values
                else:
                    objarr = _asarray_tuplesafe(obj)

                # If have integer labels, defer to label-based indexing
                if _is_integer_dtype(objarr) and not is_int_index:
                    return objarr

                # this is not the most robust, but...
                if (isinstance(labels, MultiIndex) and
                    not isinstance(objarr[0], tuple)):
                    level = 0
                    _, indexer = labels.reindex(objarr, level=level)

                    check = labels.levels[0].get_indexer(objarr)
                else:
                    level = None
                    # XXX
                    if labels.is_unique:
                        indexer = check = labels.get_indexer(objarr)
                    else:
                        mask = np.zeros(len(labels), dtype=bool)
                        lvalues = labels.values
                        for x in objarr:
                            # ugh
                            to_or = lib.map_infer(lvalues, x.__eq__)
                            if not to_or.any():
                                raise KeyError('%s not in index' % str(x))
                            mask |= to_or

                        indexer = check = mask.nonzero()[0]

                mask = check == -1
                if mask.any():
                    raise KeyError('%s not in index' % objarr[mask])

                return indexer
        else:
            return labels.get_loc(obj)
Beispiel #48
0
    def _setitem_with_indexer(self, indexer, value):
        from pandas.core.frame import DataFrame, Series

        # also has the side effect of consolidating in-place

        # mmm, spaghetti

        if self.obj._is_mixed_type:
            if not isinstance(indexer, tuple):
                indexer = self._tuplify(indexer)

            if isinstance(value, Series):
                value = self._align_series(indexer, value)

            het_axis = self.obj._het_axis
            het_idx = indexer[het_axis]

            if com.is_integer(het_idx):
                het_idx = [het_idx]

            plane_indexer = indexer[:het_axis] + indexer[het_axis + 1:]
            item_labels = self.obj._get_axis(het_axis)

            def setter(item, v):
                data = self.obj[item]
                values = data.values
                if np.prod(values.shape):
                    result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None))
                    self.obj[item] = result

            labels = item_labels[het_idx]

            if _is_list_like(value):

                # we have an equal len Frame
                if isinstance(value, DataFrame) and value.ndim > 1:

                    for item in labels:

                        # align to
                        if item in value:
                            v = value[item]
                            v = v.reindex(self.obj[item].index & v.index)
                            setter(item, v.values)
                        else:
                            setter(item, np.nan)

                # we have an equal len ndarray
                elif isinstance(value, np.ndarray) and value.ndim == 2:
                    if len(labels) != value.shape[1]:
                        raise ValueError('Must have equal len keys and value when'
                                         ' setting with an ndarray')

                    for i, item in enumerate(labels):
                        setter(item, value[:,i])

                # we have an equal len list/ndarray
                elif len(labels) == 1 and len(self.obj[labels[0]]) == len(value):
                    setter(labels[0], value)

                # per label values
                else:

                    for item, v in zip(labels, value):
                        setter(item, v)
            else:

                # scalar
                for item in labels:
                    setter(item, value)

        else:
            if isinstance(indexer, tuple):
                indexer = _maybe_convert_ix(*indexer)

            if isinstance(value, Series):
                value = self._align_series(indexer, value)

            if isinstance(value, DataFrame):
                value = self._align_frame(indexer, value)

            # 2096
            values = self.obj.values
            if np.prod(values.shape):
                values[indexer] = value
Beispiel #49
0
    def _convert_to_indexer(self, obj, axis=0):
        """
        Convert indexing key into something we can use to do actual fancy
        indexing on an ndarray

        Examples
        ix[:5] -> slice(0, 5)
        ix[[1,2,3]] -> [1,2,3]
        ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)

        Going by Zen of Python?
        "In the face of ambiguity, refuse the temptation to guess."
        raise AmbiguousIndexError with integer labels?
        - No, prefer label-based indexing
        """
        labels = self.obj._get_axis(axis)
        is_int_index = _is_integer_index(labels)

        if com.is_integer(obj) and not is_int_index:
            return obj

        try:
            return labels.get_loc(obj)
        except (KeyError, TypeError):
            pass

        if isinstance(obj, slice):
            ltype = labels.inferred_type

            # in case of providing all floats, use label-based indexing
            float_slice = (labels.inferred_type == 'floating'
                           and _is_float_slice(obj))

            # floats that are within tolerance of int used as positions
            int_slice = _is_index_slice(obj)

            null_slice = obj.start is None and obj.stop is None

            # could have integers in the first level of the MultiIndex,
            # in which case we wouldn't want to do position-based slicing
            position_slice = (int_slice
                              and not ltype == 'integer'
                              and not isinstance(labels, MultiIndex)
                              and not float_slice)

            start, stop = obj.start, obj.stop

            # last ditch effort: if we are mixed and have integers
            try:
                if position_slice and 'mixed' in ltype:
                    if start is not None:
                        i = labels.get_loc(start)
                    if stop is not None:
                        j = labels.get_loc(stop)
                    position_slice = False
            except KeyError:
                if ltype == 'mixed-integer-float':
                    raise

            if null_slice or position_slice:
                indexer = obj
            else:
                try:
                    indexer = labels.slice_indexer(start, stop, obj.step)
                except Exception:
                    if _is_index_slice(obj):
                        if ltype == 'integer':
                            raise
                        indexer = obj
                    else:
                        raise

            return indexer

        elif _is_list_like(obj):
            if com._is_bool_indexer(obj):
                obj = _check_bool_indexer(labels, obj)
                inds, = obj.nonzero()
                return inds
            else:
                if isinstance(obj, Index):
                    objarr = obj.values
                else:
                    objarr = _asarray_tuplesafe(obj)

                # If have integer labels, defer to label-based indexing
                if _is_integer_dtype(objarr) and not is_int_index:
                    if labels.inferred_type != 'integer':
                        objarr = np.where(objarr < 0,
                                          len(labels) + objarr, objarr)
                    return objarr

                # this is not the most robust, but...
                if (isinstance(labels, MultiIndex) and
                        not isinstance(objarr[0], tuple)):
                    level = 0
                    _, indexer = labels.reindex(objarr, level=level)

                    check = labels.levels[0].get_indexer(objarr)
                else:
                    level = None

                    # unique index
                    if labels.is_unique:
                        indexer = check = labels.get_indexer(objarr)

                    # non-unique (dups)
                    else:
                        indexer, missing = labels.get_indexer_non_unique(objarr)
                        check = indexer

                mask = check == -1
                if mask.any():
                    raise KeyError('%s not in index' % objarr[mask])
            
                return indexer

        else:
            return labels.get_loc(obj)
Beispiel #50
0
 def crit(x):
     try:
         _ = labels.get_loc(x)
         return False
     except KeyError:
         return com.is_integer(x) or x is None
Beispiel #51
0
 def _crit(v):
     return v is None or com.is_integer(v)
Beispiel #52
0
 def __add__(self, other):
     if com.is_integer(other):
         return PeriodIndex(ordinal=self.values + other, freq=self.freq)
     return super(PeriodIndex, self).__add__(other)
Beispiel #53
0
 def _can_hold_element(self, element):
     return com.is_integer(element)
Beispiel #54
0
    def _convert_to_indexer(self, obj, axis=0):
        """
        Convert indexing key into something we can use to do actual fancy
        indexing on an ndarray

        Examples
        ix[:5] -> slice(0, 5)
        ix[[1,2,3]] -> [1,2,3]
        ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)

        Going by Zen of Python?
        "In the face of ambiguity, refuse the temptation to guess."
        raise AmbiguousIndexError with integer labels?
        - No, prefer label-based indexing
        """
        labels = self.obj._get_axis(axis)

        try:
            return labels.get_loc(obj)
        except (KeyError, TypeError):
            pass

        is_int_index = _is_integer_index(labels)
        if isinstance(obj, slice):

            int_slice = _is_integer_slice(obj)
            null_slice = obj.start is None and obj.stop is None
            # could have integers in the first level of the MultiIndex
            position_slice = (int_slice
                              and not labels.inferred_type == 'integer'
                              and not isinstance(labels, MultiIndex))

            if null_slice or position_slice:
                slicer = obj
            else:
                try:
                    i, j = labels.slice_locs(obj.start, obj.stop)
                    slicer = slice(i, j, obj.step)
                except Exception:
                    if _is_integer_slice(obj):
                        if labels.inferred_type == 'integer':
                            raise
                        slicer = obj
                    else:
                        raise

            return slicer

        elif _is_list_like(obj):
            if com._is_bool_indexer(obj):
                objarr = _check_bool_indexer(labels, obj)
                return objarr
            else:
                objarr = _asarray_tuplesafe(obj)

                # If have integer labels, defer to label-based indexing
                if _is_integer_dtype(objarr) and not is_int_index:
                    return objarr

                indexer = labels.get_indexer(objarr)
                mask = indexer == -1
                if mask.any():
                    raise KeyError('%s not in index' % objarr[mask])

                return indexer
        else:
            if com.is_integer(obj) and not is_int_index:
                return obj
            return labels.get_loc(obj)
Beispiel #55
0
 def __add__(self, other):
     if com.is_integer(other):
         return Period(ordinal=self.ordinal + other, freq=self.freq)
     raise ValueError("Cannot add with non-integer value")