Exemple #1
0
def _get_range_edges(axis, begin, end, offset, closed='left', base=0):
    if isinstance(offset, basestring):
        offset = to_offset(offset)

    if not isinstance(offset, DateOffset):
        raise ValueError("Rule not a recognized offset")

    if isinstance(offset, Tick):
        day_nanos = _delta_to_nanoseconds(timedelta(1))
        # #1165
        if ((day_nanos % offset.nanos) == 0 and begin is None and end is None):
            return _adjust_dates_anchored(axis[0],
                                          axis[-1],
                                          offset,
                                          closed=closed,
                                          base=base)

    if begin is None:
        if closed == 'left':
            first = Timestamp(offset.rollback(axis[0]))
        else:
            first = Timestamp(axis[0] - offset)
    else:
        first = Timestamp(offset.rollback(begin))

    if end is None:
        last = Timestamp(axis[-1] + offset)
    else:
        last = Timestamp(offset.rollforward(end))

    return first, last
Exemple #2
0
def _generate_regular_range(start, end, periods, offset):
    if com._count_not_none(start, end, periods) < 2:
        raise ValueError('Must specify two of start, end, or periods')

    if isinstance(offset, Tick):
        stride = offset.nanos
        if periods is None:
            b = Timestamp(start).value
            e = Timestamp(end).value
            e += stride - e % stride
        elif start is not None:
            b = Timestamp(start).value
            e = b + periods * stride
        elif end is not None:
            e = Timestamp(end).value + stride
            b = e - periods * stride
        else:
            raise NotImplementedError

        data = np.arange(b, e, stride, dtype=np.int64)
        data = data.view(_NS_DTYPE)
    else:
        xdr = generate_range(start=start,
                             end=end,
                             periods=periods,
                             offset=offset)

        data = np.array(list(xdr), dtype=_NS_DTYPE)

    return data
Exemple #3
0
    def get_value(self, series, key):
        """
        Fast lookup of value from 1-dimensional ndarray. Only use this if you
        know what you're doing
        """
        try:
            return Index.get_value(self, series, key)
        except KeyError:

            try:
                loc = self._get_string_slice(key)
                return series[loc]
            except (TypeError, ValueError, KeyError):
                pass

            if isinstance(key, time):
                locs = self.indexer_at_time(key)
                return series.take(locs)

            if isinstance(key, basestring):
                stamp = Timestamp(key, tz=self.tz)
            else:
                stamp = Timestamp(key)
            try:
                return self._engine.get_value(series, stamp)
            except KeyError:
                raise KeyError(stamp)
Exemple #4
0
def _generate_regular_range(start, end, periods, offset):
    if isinstance(offset, Tick):
        stride = offset.nanos
        if periods is None:
            b = Timestamp(start).value
            e = Timestamp(end).value
            e += stride - e % stride
        elif start is not None:
            b = Timestamp(start).value
            e = b + periods * stride
        elif end is not None:
            e = Timestamp(end).value + stride
            b = e - periods * stride
        else:
            raise NotImplementedError

        data = np.arange(b, e, stride, dtype=np.int64)
        data = data.view(_NS_DTYPE)
    else:
        if isinstance(start, Timestamp):
            start = start.to_pydatetime()

        if isinstance(end, Timestamp):
            end = end.to_pydatetime()

        xdr = generate_range(start=start,
                             end=end,
                             periods=periods,
                             offset=offset)

        dates = list(xdr)
        # utc = len(dates) > 0 and dates[0].tzinfo is not None
        data = tools.to_datetime(dates)

    return data
Exemple #5
0
    def test_usecols_with_parse_dates_and_usecol_names(self):
        # See gh-9755
        s = """0,1,20140101,0900,4
        0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]
        names = list('acd')

        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        df = self.read_csv(StringIO(s),
                           names=names,
                           usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(s),
                           names=names,
                           usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
Exemple #6
0
def _get_range_edges(first, last, offset, closed='left', base=0):
    if isinstance(offset, compat.string_types):
        offset = to_offset(offset)

    if isinstance(offset, Tick):
        is_day = isinstance(offset, Day)
        day_nanos = _delta_to_nanoseconds(timedelta(1))

        # #1165
        if (is_day and day_nanos % offset.nanos == 0) or not is_day:
            return _adjust_dates_anchored(first,
                                          last,
                                          offset,
                                          closed=closed,
                                          base=base)

    if not isinstance(offset, Tick):  # and first.time() != last.time():
        # hack!
        first = first.normalize()
        last = last.normalize()

    if closed == 'left':
        first = Timestamp(offset.rollback(first))
    else:
        first = Timestamp(first - offset)

    last = Timestamp(last + offset)

    return first, last
Exemple #7
0
    def test_index_groupby(self):
        int_idx = Index(range(6))
        float_idx = Index(np.arange(0, 0.6, 0.1))
        obj_idx = Index('A B C D E F'.split())
        dt_idx = pd.date_range('2013-01-01', freq='M', periods=6)

        for idx in [int_idx, float_idx, obj_idx, dt_idx]:
            to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
            self.assertEqual(idx.groupby(to_groupby), {
                1.0: [idx[0], idx[5]],
                2.0: [idx[1], idx[4]]
            })

            to_groupby = Index([
                datetime(2011, 11, 1),
                datetime(2011, 12, 1), pd.NaT, pd.NaT,
                datetime(2011, 12, 1),
                datetime(2011, 11, 1)
            ],
                               tz='UTC').values

            ex_keys = pd.tslib.datetime_to_datetime64(
                np.array([Timestamp('2011-11-01'),
                          Timestamp('2011-12-01')]))
            expected = {
                ex_keys[0][0]: [idx[0], idx[5]],
                ex_keys[0][1]: [idx[1], idx[4]]
            }
            self.assertEqual(idx.groupby(to_groupby), expected)
Exemple #8
0
def _get_range_edges(axis, offset, closed='left', base=0):
    if isinstance(offset, str):
        offset = to_offset(offset)

    if isinstance(offset, Tick):
        day_nanos = _delta_to_nanoseconds(timedelta(1))
        # #1165
        if (day_nanos % offset.nanos) == 0:
            return _adjust_dates_anchored(axis[0],
                                          axis[-1],
                                          offset,
                                          closed=closed,
                                          base=base)

    first, last = axis[0], axis[-1]
    if not isinstance(offset, Tick):  # and first.time() != last.time():
        # hack!
        first = tools.normalize_date(first)
        last = tools.normalize_date(last)

    if closed == 'left':
        first = Timestamp(offset.rollback(first))
    else:
        first = Timestamp(first - offset)

    last = Timestamp(last + offset)

    return first, last
Exemple #9
0
    def _cached_range(cls,
                      start=None,
                      end=None,
                      periods=None,
                      offset=None,
                      name=None):
        if start is not None:
            start = Timestamp(start)
        if end is not None:
            end = Timestamp(end)

        if offset is None:
            raise Exception('Must provide a DateOffset!')

        drc = _daterange_cache
        if offset not in _daterange_cache:
            xdr = generate_range(offset=offset,
                                 start=_CACHE_START,
                                 end=_CACHE_END)

            arr = tools.to_datetime(list(xdr), box=False)

            cachedRange = arr.view(DatetimeIndex)
            cachedRange.offset = offset
            cachedRange.tz = None
            cachedRange.name = None
            drc[offset] = cachedRange
        else:
            cachedRange = drc[offset]

        if start is None:
            assert (isinstance(end, Timestamp))

            end = offset.rollback(end)

            endLoc = cachedRange.get_loc(end) + 1
            startLoc = endLoc - periods
        elif end is None:
            assert (isinstance(start, Timestamp))
            start = offset.rollforward(start)

            startLoc = cachedRange.get_loc(start)
            endLoc = startLoc + periods
        else:
            if not offset.onOffset(start):
                start = offset.rollforward(start)

            if not offset.onOffset(end):
                end = offset.rollback(end)

            startLoc = cachedRange.get_loc(start)
            endLoc = cachedRange.get_loc(end) + 1

        indexSlice = cachedRange[startLoc:endLoc]
        indexSlice.name = name
        indexSlice.offset = offset

        return indexSlice
Exemple #10
0
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
    # First and last offsets should be calculated from the start day to fix an
    # error cause by resampling across multiple days when a one day period is
    # not a multiple of the frequency.
    #
    # See https://github.com/pandas-dev/pandas/issues/8683

    # 14682 - Since we need to drop the TZ information to perform
    # the adjustment in the presence of a DST change,
    # save TZ Info and the DST state of the first and last parameters
    # so that we can accurately rebuild them at the end.
    first_tzinfo = first.tzinfo
    last_tzinfo = last.tzinfo
    first_dst = bool(first.dst())
    last_dst = bool(last.dst())

    first = first.tz_localize(None)
    last = last.tz_localize(None)

    start_day_nanos = first.normalize().value

    base_nanos = (base % offset.n) * offset.nanos // offset.n
    start_day_nanos += base_nanos

    foffset = (first.value - start_day_nanos) % offset.nanos
    loffset = (last.value - start_day_nanos) % offset.nanos

    if closed == 'right':
        if foffset > 0:
            # roll back
            fresult = first.value - foffset
        else:
            fresult = first.value - offset.nanos

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            # already the end of the road
            lresult = last.value
    else:  # closed == 'left'
        if foffset > 0:
            fresult = first.value - foffset
        else:
            # start of the road
            fresult = first.value

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            lresult = last.value + offset.nanos

    return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst),
            Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
Exemple #11
0
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
    #     from pandas.tseries.tools import normalize_date

    # First and last offsets should be calculated from the start day to fix an
    # error cause by resampling across multiple days when a one day period is
    # not a multiple of the frequency.
    #
    # See https://github.com/pydata/pandas/issues/8683

    first_tzinfo = first.tzinfo
    first = first.tz_localize(None)
    last = last.tz_localize(None)
    start_day_nanos = first.normalize().value

    base_nanos = (base % offset.n) * offset.nanos // offset.n
    start_day_nanos += base_nanos

    foffset = (first.value - start_day_nanos) % offset.nanos
    loffset = (last.value - start_day_nanos) % offset.nanos

    if closed == 'right':
        if foffset > 0:
            # roll back
            fresult = first.value - foffset
        else:
            fresult = first.value - offset.nanos

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            # already the end of the road
            lresult = last.value
    else:  # closed == 'left'
        if foffset > 0:
            fresult = first.value - foffset
        else:
            # start of the road
            fresult = first.value

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            lresult = last.value + offset.nanos


#     return (Timestamp(fresult, tz=first.tz),
#             Timestamp(lresult, tz=last.tz))

    return (Timestamp(fresult).tz_localize(first_tzinfo),
            Timestamp(lresult).tz_localize(first_tzinfo))
Exemple #12
0
    def _generate(cls, start, end, periods, name, offset,
                  tz=None, normalize=False):
        _normalized = True

        if start is not None:
            start = Timestamp(start)
            if not isinstance(start, Timestamp):
                raise ValueError('Failed to convert %s to timestamp'
                                 % start)

            if normalize:
                start = normalize_date(start)
                _normalized = True
            else:
                _normalized = _normalized and start.time() == _midnight

        if end is not None:
            end = Timestamp(end)
            if not isinstance(end, Timestamp):
                raise ValueError('Failed to convert %s to timestamp'
                                 % end)

            if normalize:
                end = normalize_date(end)
                _normalized = True
            else:
                _normalized = _normalized and end.time() == _midnight

        start, end, tz = tools._figure_out_timezone(start, end, tz)

        if (offset._should_cache() and
            not (offset._normalize_cache and not _normalized) and
            _naive_in_cache_range(start, end)):
            index = cls._cached_range(start, end, periods=periods,
                                      offset=offset, name=name)
        else:
            index = _generate_regular_range(start, end, periods, offset)

        if tz is not None:
            # Convert local to UTC
            ints = index.view('i8')
            lib.tz_localize_check(ints, tz)
            index = lib.tz_convert(ints, tz, _utc())
            index = index.view('M8[ns]')

        index = index.view(cls)
        index.name = name
        index.offset = offset
        index.tz = tz

        return index
Exemple #13
0
    def to_timestamp(self, freq=None, how='S'):
        """
        Return the Timestamp at the start/end of the period

        Parameters
        ----------
        freq : string or DateOffset, default frequency of PeriodIndex
            Target frequency
        how: str, default 'S' (start)
            'S', 'E'. Can be aliased as case insensitive
            'Start', 'Finish', 'Begin', 'End'

        Returns
        -------
        Timestamp
        """
        # how = _validate_end_alias(how)
        if freq is None:
            base, mult = _gfc(self.freq)
            new_val = self
        else:
            base, mult = _gfc(freq)
            new_val = self.asfreq(freq, how)

        if mult != 1:
            raise ValueError('Only mult == 1 supported')

        dt64 = plib.period_ordinal_to_dt64(new_val.ordinal, base)
        ts_freq = _period_rule_to_timestamp_rule(new_val.freq, how=how)
        return Timestamp(dt64, offset=to_offset(ts_freq))
Exemple #14
0
 def rollforward(self, dt):
     """Roll provided date forward to next offset only if not on offset"""
     if isinstance(dt, np.datetime64):
         dt = Timestamp(dt)
     if not self.onOffset(dt):
         dt = dt + self.__class__(1, **self.kwds)
     return dt
Exemple #15
0
    def assembleTimeSeries(self, context, attrname):
        """
        The function assembles a time series of all the past values
        of an attribute inside a context of the camera alerts associated
        with a locus aggregated alert.

        :param string context: the name of the context
        :param string attrname: the name of the attribute

        :return: a time series of values. Here the values are tuples of magnitude and passbands.
        :rtype: :py:class:`pandas.TimeSeries` of (uncertainFloat, string)
        """
        ## Connect to mysql database.
        conn = GetDBConn()
        cur = conn.cursor()

        query = """select ComputedAt, Value from AttributeValue where ContainerID={0} and
        ContainerType="E" and AttrName="{1}" """.format(
            self.container_id, attrname)
        cur.execute(query)
        rows = cur.fetchall()
        timestamps = []
        values = []
        for row in rows:
            timestamps.append(Timestamp(row[0]))
            values.append(row[1])

        # Return a Pandas TimeSeries
        return pd.Series(values, index=timestamps)
Exemple #16
0
    def to_timestamp(self, freq=None, how='start'):
        """
        Return the Timestamp representation of the Period at the target
        frequency at the specified end (how) of the Period

        Parameters
        ----------
        freq : string or DateOffset, default is 'D' if self.freq is week or
               longer and 'S' otherwise
            Target frequency
        how: str, default 'S' (start)
            'S', 'E'. Can be aliased as case insensitive
            'Start', 'Finish', 'Begin', 'End'

        Returns
        -------
        Timestamp
        """
        how = _validate_end_alias(how)

        if freq is None:
            base, mult = _gfc(self.freq)
            freq = _freq_mod.get_to_timestamp_base(base)

        base, mult = _gfc(freq)
        val = self.asfreq(freq, how)

        dt64 = tslib.period_ordinal_to_dt64(val.ordinal, base)
        return Timestamp(dt64)
Exemple #17
0
    def to_timestamp(self, freq=None, how='S'):
        """
        Return the Timestamp at the start/end of the period

        Parameters
        ----------
        freq : string or DateOffset, default frequency of PeriodIndex
            Target frequency
        how: str, default 'S' (start)
            'S', 'E'. Can be aliased as case insensitive
            'Start', 'Finish', 'Begin', 'End'

        Returns
        -------
        Timestamp
        """
        if freq is None:
            base, mult = _gfc(self.freq)
            how = _validate_end_alias(how)
            if how == 'S':
                base = _freq_mod.get_to_timestamp_base(base)
                freq = _freq_mod._get_freq_str(base)
                new_val = self.asfreq(freq, how)
            else:
                new_val = self
        else:
            base, mult = _gfc(freq)
            new_val = self.asfreq(freq, how)

        dt64 = plib.period_ordinal_to_dt64(new_val.ordinal, base)
        return Timestamp(dt64)
Exemple #18
0
    def test_multiple_date_col_timestamp_parse(self):
        data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25"""
        result = self.read_csv(StringIO(data), sep=',', header=None,
                               parse_dates=[[0, 1]], date_parser=Timestamp)

        ex_val = Timestamp('05/31/2012 15:30:00.029')
        self.assertEqual(result['0_1'][0], ex_val)
Exemple #19
0
    def _generate(cls, start, end, periods, name, offset,
                  tz=None, normalize=False):
        _normalized = True

        if start is not None:
            start = Timestamp(start)
            if normalize:
                start = normalize_date(start)
                _normalized = True
            else:
                _normalized = _normalized and start.time() == _midnight

        if end is not None:
            end = Timestamp(end)

            if normalize:
                end = normalize_date(end)
                _normalized = True
            else:
                _normalized = _normalized and end.time() == _midnight

        start, end, tz = tools._figure_out_timezone(start, end, tz)

        if com._count_not_none(start, end, periods) < 2:
            raise ValueError('Must specify two of start, end, or periods')

        if (offset._should_cache() and
            not (offset._normalize_cache and not _normalized) and
            _naive_in_cache_range(start, end)):
            index = cls._cached_range(start, end, periods=periods,
                                      offset=offset, name=name)
        else:
            index = _generate_regular_range(start, end, periods, offset)

        if tz is not None:
            # Convert local to UTC
            ints = index.view('i8', type=np.ndarray)
            index = lib.tz_localize_to_utc(ints, tz)
            index = index.view(_NS_DTYPE)

        index = index.view(cls)
        index.name = name
        index.offset = offset
        index.tz = tz

        return index
Exemple #20
0
    def test_datetime_nanosecond_unit(self):
        from datetime import datetime
        from pandas.lib import Timestamp

        val = datetime.now()
        stamp = Timestamp(val)

        roundtrip = ujson.decode(ujson.encode(val))
        self.assert_(roundtrip == stamp.value)
Exemple #21
0
 def max(self, axis=None):
     """
     Overridden ndarray.max to return a Timestamp
     """
     if self.is_monotonic:
         return self[-1]
     else:
         max_stamp = self.asi8.max()
         return Timestamp(max_stamp, tz=self.tz)
Exemple #22
0
def _to_m8(key):
    '''
    Timestamp-like => dt64
    '''
    if not isinstance(key, datetime):
        # this also converts strings
        key = Timestamp(key)

    return np.int64(lib.pydt_to_i8(key)).view(_NS_DTYPE)
Exemple #23
0
def _dt_box_array(arr, offset=None, tz=None):
    if arr is None:
        return arr

    if not isinstance(arr, np.ndarray):
        return arr

    boxfunc = lambda x: Timestamp(x, offset=offset, tz=tz)
    return lib.map_infer(arr, boxfunc)
Exemple #24
0
 def min(self, axis=None):
     """
     Overridden ndarray.min to return a Timestamp
     """
     if self.is_monotonic:
         return self[0]
     else:
         min_stamp = self.asi8.min()
         return Timestamp(min_stamp, tz=self.tz)
Exemple #25
0
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
    from pandas.tseries.tools import normalize_date

    start_day_nanos = Timestamp(normalize_date(first)).value
    last_day_nanos = Timestamp(normalize_date(last)).value

    base_nanos = (base % offset.n) * offset.nanos // offset.n
    start_day_nanos += base_nanos
    last_day_nanos += base_nanos

    foffset = (first.value - start_day_nanos) % offset.nanos
    loffset = (last.value - last_day_nanos) % offset.nanos

    if closed == 'right':
        if foffset > 0:
            # roll back
            fresult = first.value - foffset
        else:
            fresult = first.value - offset.nanos

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            # already the end of the road
            lresult = last.value
    else:  # closed == 'left'
        if foffset > 0:
            fresult = first.value - foffset
        else:
            # start of the road
            fresult = first.value

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            lresult = last.value + offset.nanos

    return (Timestamp(fresult, tz=first.tz),
            Timestamp(lresult, tz=last.tz))
Exemple #26
0
    def _partial_date_slice(self, reso, parsed):
        if not self.is_monotonic:
            raise TimeSeriesError(
                'Partial indexing only valid for ordered time'
                ' series')

        if reso == 'year':
            t1 = Timestamp(datetime(parsed.year, 1, 1))
            t2 = Timestamp(datetime(parsed.year, 12, 31))
        elif reso == 'month':
            d = lib.monthrange(parsed.year, parsed.month)[1]
            t1 = Timestamp(datetime(parsed.year, parsed.month, 1))
            t2 = Timestamp(datetime(parsed.year, parsed.month, d))
        elif reso == 'quarter':
            qe = (((parsed.month - 1) + 2) % 12) + 1  # two months ahead
            d = lib.monthrange(parsed.year, qe)[1]  # at end of month
            t1 = Timestamp(datetime(parsed.year, parsed.month, 1))
            t2 = Timestamp(datetime(parsed.year, qe, d))
        else:
            raise KeyError

        stamps = self.asi8
        left = stamps.searchsorted(t1.value, side='left')
        right = stamps.searchsorted(t2.value, side='right')
        return slice(left, right)
Exemple #27
0
def mavg(x,y, window):
    "compute moving average"
    x, y = map(plot_friendly, [x,y])
    x_is_date = _isdate(x[0])
    if x_is_date:
        x = np.array([i.toordinal() for i in x])
    std_err = pd.rolling_std(y, window)
    y = pd.rolling_mean(y, window)
    y1 = y - std_err
    y2 = y + std_err
    if x_is_date:
        x = [Timestamp.fromordinal(int(i)) for i in x]
    return (x, y, y1.tolist(), y2.tolist())
Exemple #28
0
def _get_range_edges(axis, offset, closed='left', base=0):
    if isinstance(offset, basestring):
        offset = to_offset(offset)

    if isinstance(offset, Tick):
        day_nanos = _delta_to_nanoseconds(timedelta(1))
        # #1165
        if (day_nanos % offset.nanos) == 0:
            return _adjust_dates_anchored(axis[0],
                                          axis[-1],
                                          offset,
                                          closed=closed,
                                          base=base)

    if closed == 'left':
        first = Timestamp(offset.rollback(axis[0]))
    else:
        first = Timestamp(axis[0] - offset)

    last = Timestamp(axis[-1] + offset)

    return first, last
Exemple #29
0
def mavg(x, y, window):
    "compute moving average"
    x, y = map(_plot_friendly, [x, y])
    x_is_date = _isdate(x.iloc[0])
    if x_is_date:
        x = np.array([i.toordinal() for i in x])
    std_err = pd.rolling_std(y, window)
    y = pd.rolling_mean(y, window)
    y1 = y - std_err
    y2 = y + std_err

    if x_is_date:
        x = [Timestamp.fromordinal(int(i)) for i in x]
    return (x, y, y1, y2)
Exemple #30
0
    def _generate(cls,
                  start,
                  end,
                  periods,
                  name,
                  offset,
                  tz=None,
                  normalize=False):
        _normalized = True

        if start is not None:
            start = Timestamp(start)
            if not isinstance(start, Timestamp):
                raise ValueError('Failed to convert %s to timestamp' % start)

            if normalize:
                start = normalize_date(start)
                _normalized = True
            else:
                _normalized = _normalized and start.time() == _midnight

        if end is not None:
            end = Timestamp(end)
            if not isinstance(end, Timestamp):
                raise ValueError('Failed to convert %s to timestamp' % end)

            if normalize:
                end = normalize_date(end)
                _normalized = True
            else:
                _normalized = _normalized and end.time() == _midnight

        start, end, tz = tools._figure_out_timezone(start, end, tz)

        if (offset._should_cache()
                and not (offset._normalize_cache and not _normalized)
                and _naive_in_cache_range(start, end)):
            index = cls._cached_range(start,
                                      end,
                                      periods=periods,
                                      offset=offset,
                                      name=name)
        else:
            index = _generate_regular_range(start, end, periods, offset)

        if tz is not None:
            # Convert local to UTC
            ints = index.view('i8')
            index = lib.tz_localize_to_utc(ints, tz)
            index = index.view(_NS_DTYPE)

        index = index.view(cls)
        index.name = name
        index.offset = offset
        index.tz = tz

        return index
Exemple #31
0
    def test_index_groupby(self):
        int_idx = Index(range(6))
        float_idx = Index(np.arange(0, 0.6, 0.1))
        obj_idx = Index('A B C D E F'.split())
        dt_idx = pd.date_range('2013-01-01', freq='M', periods=6)

        for idx in [int_idx, float_idx, obj_idx, dt_idx]:
            to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
            tm.assert_dict_equal(idx.groupby(to_groupby), {
                1.0: idx[[0, 5]],
                2.0: idx[[1, 4]]
            })

            to_groupby = Index([
                datetime(2011, 11, 1),
                datetime(2011, 12, 1), pd.NaT, pd.NaT,
                datetime(2011, 12, 1),
                datetime(2011, 11, 1)
            ],
                               tz='UTC').values

            ex_keys = [Timestamp('2011-11-01'), Timestamp('2011-12-01')]
            expected = {ex_keys[0]: idx[[0, 5]], ex_keys[1]: idx[[1, 4]]}
            tm.assert_dict_equal(idx.groupby(to_groupby), expected)
Exemple #32
0
    def test_astype(self):
        # GH 13149, GH 13209
        idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])

        result = idx.astype(object)
        expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object)
        tm.assert_index_equal(result, expected)

        result = idx.astype(int)
        expected = Int64Index([1463356800000000000] +
                              [-9223372036854775808] * 3,
                              dtype=np.int64)
        tm.assert_index_equal(result, expected)

        rng = date_range('1/1/2000', periods=10)
        result = rng.astype('i8')
        self.assert_numpy_array_equal(result, rng.asi8)
Exemple #33
0
def lm(x, y, alpha=ALPHA):
    "fits an OLS from statsmodels. returns tuple."
    x_is_date = _isdate(x.iloc[0])
    if x_is_date:
        x = np.array([i.toordinal() for i in x])
    X = sm.add_constant(x)
    fit = sm.OLS(y, X).fit()
    prstd, iv_l, iv_u = wls_prediction_std(fit)
    _, summary_values, summary_names = summary_table(fit, alpha=alpha)
    df = pd.DataFrame(summary_values, columns=map(_snakify, summary_names))
    # TODO: indexing w/ data frame is messing everything up
    fittedvalues        = df['predicted_value'].values
    predict_mean_ci_low = df['mean_ci_95%_low'].values
    predict_mean_ci_upp = df['mean_ci_95%_upp'].values
    predict_ci_low      = df['predict_ci_95%_low'].values
    predict_ci_upp      = df['predict_ci_95%_upp'].values

    if x_is_date:
        x = [Timestamp.fromordinal(int(i)) for i in x]
    return (x, fittedvalues, predict_mean_ci_low, predict_mean_ci_upp)
Exemple #34
0
def lowess(x, y, span=SPAN):
    "returns y-values estimated using the lowess function in statsmodels."
    """
    for more see
        statsmodels.nonparametric.smoothers_lowess.lowess
    """
    x, y = map(plot_friendly, [x,y])
    x_is_date = _isdate(x[0])
    if x_is_date:
        x = np.array([i.toordinal() for i in x])
    result = smlowess(np.array(y), np.array(x), frac=span)
    x = pd.Series(result[::,0])
    y = pd.Series(result[::,1])
    lower, upper = stats.t.interval(span, len(x), loc=0, scale=2)
    std = np.std(y)
    y1 = pd.Series(lower * std +  y).tolist()
    y2 = pd.Series(upper * std +  y).tolist()
    if x_is_date:
        x = [Timestamp.fromordinal(int(i)) for i in x]
    return (x, y, y1, y2)
Exemple #35
0
def lm(x, y, alpha=ALPHA):
    "fits an OLS from statsmodels. returns tuple."
    x_is_date = _isdate(x.iloc[0])
    if x_is_date:
        x = np.array([i.toordinal() for i in x])
    X = sm.add_constant(x)
    fit = sm.OLS(y, X).fit()
    prstd, iv_l, iv_u = wls_prediction_std(fit)
    _, summary_values, summary_names = summary_table(fit, alpha=alpha)
    df = pd.DataFrame(summary_values, columns=map(_snakify, summary_names))
    # TODO: indexing w/ data frame is messing everything up
    fittedvalues = df['predicted_value'].values
    predict_mean_ci_low = df['mean_ci_95%_low'].values
    predict_mean_ci_upp = df['mean_ci_95%_upp'].values
    predict_ci_low = df['predict_ci_95%_low'].values
    predict_ci_upp = df['predict_ci_95%_upp'].values

    if x_is_date:
        x = [Timestamp.fromordinal(int(i)) for i in x]
    return (x, fittedvalues, predict_mean_ci_low, predict_mean_ci_upp)
Exemple #36
0
def lm(x, y, alpha=ALPHA):
    "fits an OLS from statsmodels. returns tuple."
    x, y = map(plot_friendly, [x,y])
    x_is_date = _isdate(x[0])
    if x_is_date:
        x = np.array([i.toordinal() for i in x])
    X = sm.add_constant(x)
    fit = sm.OLS(y, X).fit()
    prstd, iv_l, iv_u = wls_prediction_std(fit)
    _, summary_values, summary_names = summary_table(fit, alpha=alpha)
    df = pd.DataFrame(summary_values, columns=map(snakify, summary_names))
    fittedvalues        = df['predicted_value']
    predict_mean_se     = df['std_error_mean_predict']
    predict_mean_ci_low = df['mean_ci_95%_low']
    predict_mean_ci_upp = df['mean_ci_95%_upp']
    predict_ci_low      = df['predict_ci_95%_low']
    predict_ci_upp      = df['predict_ci_95%_upp']
    if x_is_date:
        x = [Timestamp.fromordinal(int(i)) for i in x]
    return (x, fittedvalues.tolist(), predict_mean_ci_low.tolist(),
            predict_mean_ci_upp.tolist())
Exemple #37
0
    def _generate(cls, start, end, periods, name, offset,
                  tz=None, normalize=False):
        if com._count_not_none(start, end, periods) < 2:
            raise ValueError('Must specify two of start, end, or periods')

        _normalized = True

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        inferred_tz = tools._infer_tzinfo(start, end)

        if tz is not None and inferred_tz is not None:
            assert(inferred_tz == tz)
        elif inferred_tz is not None:
            tz = inferred_tz

        tz = tools._maybe_get_tz(tz)

        if start is not None:
            if normalize:
                start = normalize_date(start)
                _normalized = True
            else:
                _normalized = _normalized and start.time() == _midnight

        if end is not None:
            if normalize:
                end = normalize_date(end)
                _normalized = True
            else:
                _normalized = _normalized and end.time() == _midnight

        if hasattr(offset, 'delta') and offset != offsets.Day():
            if inferred_tz is None and tz is not None:
                # naive dates
                if start is not None and start.tz is None:
                    start = start.tz_localize(tz)

                if end is not None and end.tz is None:
                    end = end.tz_localize(tz)

            if start and end:
                if start.tz is None and end.tz is not None:
                    start = start.tz_localize(end.tz)

                if end.tz is None and start.tz is not None:
                    end = end.tz_localize(start.tz)

            if (offset._should_cache() and
                not (offset._normalize_cache and not _normalized) and
                _naive_in_cache_range(start, end)):
                index = cls._cached_range(start, end, periods=periods,
                                          offset=offset, name=name)
            else:
                index = _generate_regular_range(start, end, periods, offset)

        else:

            if inferred_tz is None and tz is not None:
                # naive dates
                if start is not None and start.tz is not None:
                    start = start.replace(tzinfo=None)

                if end is not None and end.tz is not None:
                    end = end.replace(tzinfo=None)

            if start and end:
                if start.tz is None and end.tz is not None:
                    end = end.replace(tzinfo=None)

                if end.tz is None and start.tz is not None:
                    start = start.replace(tzinfo=None)

            if (offset._should_cache() and
                not (offset._normalize_cache and not _normalized) and
                _naive_in_cache_range(start, end)):
                index = cls._cached_range(start, end, periods=periods,
                                          offset=offset, name=name)
            else:
                index = _generate_regular_range(start, end, periods, offset)

            if tz is not None and getattr(index, 'tz', None) is None:
                index = lib.tz_localize_to_utc(com._ensure_int64(index), tz)
                index = index.view(_NS_DTYPE)

        index = index.view(cls)
        index.name = name
        index.offset = offset
        index.tz = tz

        return index
Exemple #38
0
 def onOffset(cls, dt):
     if isinstance(dt, np.datetime64):
          dt = Timestamp(dt)
     return dt.weekday() < 5
Exemple #39
0
    def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False):
        _normalized = True

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        inferred_tz = tools._infer_tzinfo(start, end)

        if tz is not None and inferred_tz is not None:
            assert inferred_tz == tz
        elif inferred_tz is not None:
            tz = inferred_tz

        if inferred_tz is None and tz is not None:
            # naive dates
            if start is not None and start.tz is None:
                start = start.tz_localize(tz)

            if end is not None and end.tz is None:
                end = end.tz_localize(tz)
        elif inferred_tz is not None:
            pass

        if start and end:
            if start.tz is None and end.tz is not None:
                start = start.tz_localize(end.tz)

            if end.tz is None and start.tz is not None:
                end = end.tz_localize(start.tz)

        if start is not None:
            if normalize:
                start = normalize_date(start)
                _normalized = True
            else:
                _normalized = _normalized and start.time() == _midnight

        if end is not None:
            if normalize:
                end = normalize_date(end)
                _normalized = True
            else:
                _normalized = _normalized and end.time() == _midnight

        tz = tools._maybe_get_tz(tz)

        if com._count_not_none(start, end, periods) < 2:
            raise ValueError("Must specify two of start, end, or periods")

        if (
            offset._should_cache()
            and not (offset._normalize_cache and not _normalized)
            and _naive_in_cache_range(start, end)
        ):
            index = cls._cached_range(start, end, periods=periods, offset=offset, name=name)
        else:
            index = _generate_regular_range(start, end, periods, offset)

        index = index.view(cls)
        index.name = name
        index.offset = offset
        index.tz = tz

        return index