def _get_range_edges(axis, begin, end, offset, closed='left', base=0): if isinstance(offset, basestring): offset = to_offset(offset) if not isinstance(offset, DateOffset): raise ValueError("Rule not a recognized offset") if isinstance(offset, Tick): day_nanos = _delta_to_nanoseconds(timedelta(1)) # #1165 if ((day_nanos % offset.nanos) == 0 and begin is None and end is None): return _adjust_dates_anchored(axis[0], axis[-1], offset, closed=closed, base=base) if begin is None: if closed == 'left': first = Timestamp(offset.rollback(axis[0])) else: first = Timestamp(axis[0] - offset) else: first = Timestamp(offset.rollback(begin)) if end is None: last = Timestamp(axis[-1] + offset) else: last = Timestamp(offset.rollforward(end)) return first, last
def _generate_regular_range(start, end, periods, offset): if com._count_not_none(start, end, periods) < 2: raise ValueError('Must specify two of start, end, or periods') if isinstance(offset, Tick): stride = offset.nanos if periods is None: b = Timestamp(start).value e = Timestamp(end).value e += stride - e % stride elif start is not None: b = Timestamp(start).value e = b + periods * stride elif end is not None: e = Timestamp(end).value + stride b = e - periods * stride else: raise NotImplementedError data = np.arange(b, e, stride, dtype=np.int64) data = data.view(_NS_DTYPE) else: xdr = generate_range(start=start, end=end, periods=periods, offset=offset) data = np.array(list(xdr), dtype=_NS_DTYPE) return data
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ try: return Index.get_value(self, series, key) except KeyError: try: loc = self._get_string_slice(key) return series[loc] except (TypeError, ValueError, KeyError): pass if isinstance(key, time): locs = self.indexer_at_time(key) return series.take(locs) if isinstance(key, basestring): stamp = Timestamp(key, tz=self.tz) else: stamp = Timestamp(key) try: return self._engine.get_value(series, stamp) except KeyError: raise KeyError(stamp)
def _generate_regular_range(start, end, periods, offset): if isinstance(offset, Tick): stride = offset.nanos if periods is None: b = Timestamp(start).value e = Timestamp(end).value e += stride - e % stride elif start is not None: b = Timestamp(start).value e = b + periods * stride elif end is not None: e = Timestamp(end).value + stride b = e - periods * stride else: raise NotImplementedError data = np.arange(b, e, stride, dtype=np.int64) data = data.view(_NS_DTYPE) else: if isinstance(start, Timestamp): start = start.to_pydatetime() if isinstance(end, Timestamp): end = end.to_pydatetime() xdr = generate_range(start=start, end=end, periods=periods, offset=offset) dates = list(xdr) # utc = len(dates) > 0 and dates[0].tzinfo is not None data = tools.to_datetime(dates) return data
def test_usecols_with_parse_dates_and_usecol_names(self): # See gh-9755 s = """0,1,20140101,0900,4 0,1,20140102,1000,4""" parse_dates = [[1, 2]] names = list('acd') cols = { 'a': [0, 0], 'c_d': [ Timestamp('2014-01-01 09:00:00'), Timestamp('2014-01-02 10:00:00') ] } expected = DataFrame(cols, columns=['c_d', 'a']) df = self.read_csv(StringIO(s), names=names, usecols=[0, 2, 3], parse_dates=parse_dates) tm.assert_frame_equal(df, expected) df = self.read_csv(StringIO(s), names=names, usecols=[3, 0, 2], parse_dates=parse_dates) tm.assert_frame_equal(df, expected)
def _get_range_edges(first, last, offset, closed='left', base=0): if isinstance(offset, compat.string_types): offset = to_offset(offset) if isinstance(offset, Tick): is_day = isinstance(offset, Day) day_nanos = _delta_to_nanoseconds(timedelta(1)) # #1165 if (is_day and day_nanos % offset.nanos == 0) or not is_day: return _adjust_dates_anchored(first, last, offset, closed=closed, base=base) if not isinstance(offset, Tick): # and first.time() != last.time(): # hack! first = first.normalize() last = last.normalize() if closed == 'left': first = Timestamp(offset.rollback(first)) else: first = Timestamp(first - offset) last = Timestamp(last + offset) return first, last
def test_index_groupby(self): int_idx = Index(range(6)) float_idx = Index(np.arange(0, 0.6, 0.1)) obj_idx = Index('A B C D E F'.split()) dt_idx = pd.date_range('2013-01-01', freq='M', periods=6) for idx in [int_idx, float_idx, obj_idx, dt_idx]: to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1]) self.assertEqual(idx.groupby(to_groupby), { 1.0: [idx[0], idx[5]], 2.0: [idx[1], idx[4]] }) to_groupby = Index([ datetime(2011, 11, 1), datetime(2011, 12, 1), pd.NaT, pd.NaT, datetime(2011, 12, 1), datetime(2011, 11, 1) ], tz='UTC').values ex_keys = pd.tslib.datetime_to_datetime64( np.array([Timestamp('2011-11-01'), Timestamp('2011-12-01')])) expected = { ex_keys[0][0]: [idx[0], idx[5]], ex_keys[0][1]: [idx[1], idx[4]] } self.assertEqual(idx.groupby(to_groupby), expected)
def _get_range_edges(axis, offset, closed='left', base=0): if isinstance(offset, str): offset = to_offset(offset) if isinstance(offset, Tick): day_nanos = _delta_to_nanoseconds(timedelta(1)) # #1165 if (day_nanos % offset.nanos) == 0: return _adjust_dates_anchored(axis[0], axis[-1], offset, closed=closed, base=base) first, last = axis[0], axis[-1] if not isinstance(offset, Tick): # and first.time() != last.time(): # hack! first = tools.normalize_date(first) last = tools.normalize_date(last) if closed == 'left': first = Timestamp(offset.rollback(first)) else: first = Timestamp(first - offset) last = Timestamp(last + offset) return first, last
def _cached_range(cls, start=None, end=None, periods=None, offset=None, name=None): if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if offset is None: raise Exception('Must provide a DateOffset!') drc = _daterange_cache if offset not in _daterange_cache: xdr = generate_range(offset=offset, start=_CACHE_START, end=_CACHE_END) arr = tools.to_datetime(list(xdr), box=False) cachedRange = arr.view(DatetimeIndex) cachedRange.offset = offset cachedRange.tz = None cachedRange.name = None drc[offset] = cachedRange else: cachedRange = drc[offset] if start is None: assert (isinstance(end, Timestamp)) end = offset.rollback(end) endLoc = cachedRange.get_loc(end) + 1 startLoc = endLoc - periods elif end is None: assert (isinstance(start, Timestamp)) start = offset.rollforward(start) startLoc = cachedRange.get_loc(start) endLoc = startLoc + periods else: if not offset.onOffset(start): start = offset.rollforward(start) if not offset.onOffset(end): end = offset.rollback(end) startLoc = cachedRange.get_loc(start) endLoc = cachedRange.get_loc(end) + 1 indexSlice = cachedRange[startLoc:endLoc] indexSlice.name = name indexSlice.offset = offset return indexSlice
def _adjust_dates_anchored(first, last, offset, closed='right', base=0): # First and last offsets should be calculated from the start day to fix an # error cause by resampling across multiple days when a one day period is # not a multiple of the frequency. # # See https://github.com/pandas-dev/pandas/issues/8683 # 14682 - Since we need to drop the TZ information to perform # the adjustment in the presence of a DST change, # save TZ Info and the DST state of the first and last parameters # so that we can accurately rebuild them at the end. first_tzinfo = first.tzinfo last_tzinfo = last.tzinfo first_dst = bool(first.dst()) last_dst = bool(last.dst()) first = first.tz_localize(None) last = last.tz_localize(None) start_day_nanos = first.normalize().value base_nanos = (base % offset.n) * offset.nanos // offset.n start_day_nanos += base_nanos foffset = (first.value - start_day_nanos) % offset.nanos loffset = (last.value - start_day_nanos) % offset.nanos if closed == 'right': if foffset > 0: # roll back fresult = first.value - foffset else: fresult = first.value - offset.nanos if loffset > 0: # roll forward lresult = last.value + (offset.nanos - loffset) else: # already the end of the road lresult = last.value else: # closed == 'left' if foffset > 0: fresult = first.value - foffset else: # start of the road fresult = first.value if loffset > 0: # roll forward lresult = last.value + (offset.nanos - loffset) else: lresult = last.value + offset.nanos return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst), Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
def _adjust_dates_anchored(first, last, offset, closed='right', base=0): # from pandas.tseries.tools import normalize_date # First and last offsets should be calculated from the start day to fix an # error cause by resampling across multiple days when a one day period is # not a multiple of the frequency. # # See https://github.com/pydata/pandas/issues/8683 first_tzinfo = first.tzinfo first = first.tz_localize(None) last = last.tz_localize(None) start_day_nanos = first.normalize().value base_nanos = (base % offset.n) * offset.nanos // offset.n start_day_nanos += base_nanos foffset = (first.value - start_day_nanos) % offset.nanos loffset = (last.value - start_day_nanos) % offset.nanos if closed == 'right': if foffset > 0: # roll back fresult = first.value - foffset else: fresult = first.value - offset.nanos if loffset > 0: # roll forward lresult = last.value + (offset.nanos - loffset) else: # already the end of the road lresult = last.value else: # closed == 'left' if foffset > 0: fresult = first.value - foffset else: # start of the road fresult = first.value if loffset > 0: # roll forward lresult = last.value + (offset.nanos - loffset) else: lresult = last.value + offset.nanos # return (Timestamp(fresult, tz=first.tz), # Timestamp(lresult, tz=last.tz)) return (Timestamp(fresult).tz_localize(first_tzinfo), Timestamp(lresult).tz_localize(first_tzinfo))
def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False): _normalized = True if start is not None: start = Timestamp(start) if not isinstance(start, Timestamp): raise ValueError('Failed to convert %s to timestamp' % start) if normalize: start = normalize_date(start) _normalized = True else: _normalized = _normalized and start.time() == _midnight if end is not None: end = Timestamp(end) if not isinstance(end, Timestamp): raise ValueError('Failed to convert %s to timestamp' % end) if normalize: end = normalize_date(end) _normalized = True else: _normalized = _normalized and end.time() == _midnight start, end, tz = tools._figure_out_timezone(start, end, tz) if (offset._should_cache() and not (offset._normalize_cache and not _normalized) and _naive_in_cache_range(start, end)): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: index = _generate_regular_range(start, end, periods, offset) if tz is not None: # Convert local to UTC ints = index.view('i8') lib.tz_localize_check(ints, tz) index = lib.tz_convert(ints, tz, _utc()) index = index.view('M8[ns]') index = index.view(cls) index.name = name index.offset = offset index.tz = tz return index
def to_timestamp(self, freq=None, how='S'): """ Return the Timestamp at the start/end of the period Parameters ---------- freq : string or DateOffset, default frequency of PeriodIndex Target frequency how: str, default 'S' (start) 'S', 'E'. Can be aliased as case insensitive 'Start', 'Finish', 'Begin', 'End' Returns ------- Timestamp """ # how = _validate_end_alias(how) if freq is None: base, mult = _gfc(self.freq) new_val = self else: base, mult = _gfc(freq) new_val = self.asfreq(freq, how) if mult != 1: raise ValueError('Only mult == 1 supported') dt64 = plib.period_ordinal_to_dt64(new_val.ordinal, base) ts_freq = _period_rule_to_timestamp_rule(new_val.freq, how=how) return Timestamp(dt64, offset=to_offset(ts_freq))
def rollforward(self, dt): """Roll provided date forward to next offset only if not on offset""" if isinstance(dt, np.datetime64): dt = Timestamp(dt) if not self.onOffset(dt): dt = dt + self.__class__(1, **self.kwds) return dt
def assembleTimeSeries(self, context, attrname): """ The function assembles a time series of all the past values of an attribute inside a context of the camera alerts associated with a locus aggregated alert. :param string context: the name of the context :param string attrname: the name of the attribute :return: a time series of values. Here the values are tuples of magnitude and passbands. :rtype: :py:class:`pandas.TimeSeries` of (uncertainFloat, string) """ ## Connect to mysql database. conn = GetDBConn() cur = conn.cursor() query = """select ComputedAt, Value from AttributeValue where ContainerID={0} and ContainerType="E" and AttrName="{1}" """.format( self.container_id, attrname) cur.execute(query) rows = cur.fetchall() timestamps = [] values = [] for row in rows: timestamps.append(Timestamp(row[0])) values.append(row[1]) # Return a Pandas TimeSeries return pd.Series(values, index=timestamps)
def to_timestamp(self, freq=None, how='start'): """ Return the Timestamp representation of the Period at the target frequency at the specified end (how) of the Period Parameters ---------- freq : string or DateOffset, default is 'D' if self.freq is week or longer and 'S' otherwise Target frequency how: str, default 'S' (start) 'S', 'E'. Can be aliased as case insensitive 'Start', 'Finish', 'Begin', 'End' Returns ------- Timestamp """ how = _validate_end_alias(how) if freq is None: base, mult = _gfc(self.freq) freq = _freq_mod.get_to_timestamp_base(base) base, mult = _gfc(freq) val = self.asfreq(freq, how) dt64 = tslib.period_ordinal_to_dt64(val.ordinal, base) return Timestamp(dt64)
def to_timestamp(self, freq=None, how='S'): """ Return the Timestamp at the start/end of the period Parameters ---------- freq : string or DateOffset, default frequency of PeriodIndex Target frequency how: str, default 'S' (start) 'S', 'E'. Can be aliased as case insensitive 'Start', 'Finish', 'Begin', 'End' Returns ------- Timestamp """ if freq is None: base, mult = _gfc(self.freq) how = _validate_end_alias(how) if how == 'S': base = _freq_mod.get_to_timestamp_base(base) freq = _freq_mod._get_freq_str(base) new_val = self.asfreq(freq, how) else: new_val = self else: base, mult = _gfc(freq) new_val = self.asfreq(freq, how) dt64 = plib.period_ordinal_to_dt64(new_val.ordinal, base) return Timestamp(dt64)
def test_multiple_date_col_timestamp_parse(self): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" result = self.read_csv(StringIO(data), sep=',', header=None, parse_dates=[[0, 1]], date_parser=Timestamp) ex_val = Timestamp('05/31/2012 15:30:00.029') self.assertEqual(result['0_1'][0], ex_val)
def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False): _normalized = True if start is not None: start = Timestamp(start) if normalize: start = normalize_date(start) _normalized = True else: _normalized = _normalized and start.time() == _midnight if end is not None: end = Timestamp(end) if normalize: end = normalize_date(end) _normalized = True else: _normalized = _normalized and end.time() == _midnight start, end, tz = tools._figure_out_timezone(start, end, tz) if com._count_not_none(start, end, periods) < 2: raise ValueError('Must specify two of start, end, or periods') if (offset._should_cache() and not (offset._normalize_cache and not _normalized) and _naive_in_cache_range(start, end)): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: index = _generate_regular_range(start, end, periods, offset) if tz is not None: # Convert local to UTC ints = index.view('i8', type=np.ndarray) index = lib.tz_localize_to_utc(ints, tz) index = index.view(_NS_DTYPE) index = index.view(cls) index.name = name index.offset = offset index.tz = tz return index
def test_datetime_nanosecond_unit(self): from datetime import datetime from pandas.lib import Timestamp val = datetime.now() stamp = Timestamp(val) roundtrip = ujson.decode(ujson.encode(val)) self.assert_(roundtrip == stamp.value)
def max(self, axis=None): """ Overridden ndarray.max to return a Timestamp """ if self.is_monotonic: return self[-1] else: max_stamp = self.asi8.max() return Timestamp(max_stamp, tz=self.tz)
def _to_m8(key): ''' Timestamp-like => dt64 ''' if not isinstance(key, datetime): # this also converts strings key = Timestamp(key) return np.int64(lib.pydt_to_i8(key)).view(_NS_DTYPE)
def _dt_box_array(arr, offset=None, tz=None): if arr is None: return arr if not isinstance(arr, np.ndarray): return arr boxfunc = lambda x: Timestamp(x, offset=offset, tz=tz) return lib.map_infer(arr, boxfunc)
def min(self, axis=None): """ Overridden ndarray.min to return a Timestamp """ if self.is_monotonic: return self[0] else: min_stamp = self.asi8.min() return Timestamp(min_stamp, tz=self.tz)
def _adjust_dates_anchored(first, last, offset, closed='right', base=0): from pandas.tseries.tools import normalize_date start_day_nanos = Timestamp(normalize_date(first)).value last_day_nanos = Timestamp(normalize_date(last)).value base_nanos = (base % offset.n) * offset.nanos // offset.n start_day_nanos += base_nanos last_day_nanos += base_nanos foffset = (first.value - start_day_nanos) % offset.nanos loffset = (last.value - last_day_nanos) % offset.nanos if closed == 'right': if foffset > 0: # roll back fresult = first.value - foffset else: fresult = first.value - offset.nanos if loffset > 0: # roll forward lresult = last.value + (offset.nanos - loffset) else: # already the end of the road lresult = last.value else: # closed == 'left' if foffset > 0: fresult = first.value - foffset else: # start of the road fresult = first.value if loffset > 0: # roll forward lresult = last.value + (offset.nanos - loffset) else: lresult = last.value + offset.nanos return (Timestamp(fresult, tz=first.tz), Timestamp(lresult, tz=last.tz))
def _partial_date_slice(self, reso, parsed): if not self.is_monotonic: raise TimeSeriesError( 'Partial indexing only valid for ordered time' ' series') if reso == 'year': t1 = Timestamp(datetime(parsed.year, 1, 1)) t2 = Timestamp(datetime(parsed.year, 12, 31)) elif reso == 'month': d = lib.monthrange(parsed.year, parsed.month)[1] t1 = Timestamp(datetime(parsed.year, parsed.month, 1)) t2 = Timestamp(datetime(parsed.year, parsed.month, d)) elif reso == 'quarter': qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead d = lib.monthrange(parsed.year, qe)[1] # at end of month t1 = Timestamp(datetime(parsed.year, parsed.month, 1)) t2 = Timestamp(datetime(parsed.year, qe, d)) else: raise KeyError stamps = self.asi8 left = stamps.searchsorted(t1.value, side='left') right = stamps.searchsorted(t2.value, side='right') return slice(left, right)
def mavg(x,y, window): "compute moving average" x, y = map(plot_friendly, [x,y]) x_is_date = _isdate(x[0]) if x_is_date: x = np.array([i.toordinal() for i in x]) std_err = pd.rolling_std(y, window) y = pd.rolling_mean(y, window) y1 = y - std_err y2 = y + std_err if x_is_date: x = [Timestamp.fromordinal(int(i)) for i in x] return (x, y, y1.tolist(), y2.tolist())
def _get_range_edges(axis, offset, closed='left', base=0): if isinstance(offset, basestring): offset = to_offset(offset) if isinstance(offset, Tick): day_nanos = _delta_to_nanoseconds(timedelta(1)) # #1165 if (day_nanos % offset.nanos) == 0: return _adjust_dates_anchored(axis[0], axis[-1], offset, closed=closed, base=base) if closed == 'left': first = Timestamp(offset.rollback(axis[0])) else: first = Timestamp(axis[0] - offset) last = Timestamp(axis[-1] + offset) return first, last
def mavg(x, y, window): "compute moving average" x, y = map(_plot_friendly, [x, y]) x_is_date = _isdate(x.iloc[0]) if x_is_date: x = np.array([i.toordinal() for i in x]) std_err = pd.rolling_std(y, window) y = pd.rolling_mean(y, window) y1 = y - std_err y2 = y + std_err if x_is_date: x = [Timestamp.fromordinal(int(i)) for i in x] return (x, y, y1, y2)
def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False): _normalized = True if start is not None: start = Timestamp(start) if not isinstance(start, Timestamp): raise ValueError('Failed to convert %s to timestamp' % start) if normalize: start = normalize_date(start) _normalized = True else: _normalized = _normalized and start.time() == _midnight if end is not None: end = Timestamp(end) if not isinstance(end, Timestamp): raise ValueError('Failed to convert %s to timestamp' % end) if normalize: end = normalize_date(end) _normalized = True else: _normalized = _normalized and end.time() == _midnight start, end, tz = tools._figure_out_timezone(start, end, tz) if (offset._should_cache() and not (offset._normalize_cache and not _normalized) and _naive_in_cache_range(start, end)): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: index = _generate_regular_range(start, end, periods, offset) if tz is not None: # Convert local to UTC ints = index.view('i8') index = lib.tz_localize_to_utc(ints, tz) index = index.view(_NS_DTYPE) index = index.view(cls) index.name = name index.offset = offset index.tz = tz return index
def test_index_groupby(self): int_idx = Index(range(6)) float_idx = Index(np.arange(0, 0.6, 0.1)) obj_idx = Index('A B C D E F'.split()) dt_idx = pd.date_range('2013-01-01', freq='M', periods=6) for idx in [int_idx, float_idx, obj_idx, dt_idx]: to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1]) tm.assert_dict_equal(idx.groupby(to_groupby), { 1.0: idx[[0, 5]], 2.0: idx[[1, 4]] }) to_groupby = Index([ datetime(2011, 11, 1), datetime(2011, 12, 1), pd.NaT, pd.NaT, datetime(2011, 12, 1), datetime(2011, 11, 1) ], tz='UTC').values ex_keys = [Timestamp('2011-11-01'), Timestamp('2011-12-01')] expected = {ex_keys[0]: idx[[0, 5]], ex_keys[1]: idx[[1, 4]]} tm.assert_dict_equal(idx.groupby(to_groupby), expected)
def test_astype(self): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) result = idx.astype(object) expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index([1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64) tm.assert_index_equal(result, expected) rng = date_range('1/1/2000', periods=10) result = rng.astype('i8') self.assert_numpy_array_equal(result, rng.asi8)
def lm(x, y, alpha=ALPHA): "fits an OLS from statsmodels. returns tuple." x_is_date = _isdate(x.iloc[0]) if x_is_date: x = np.array([i.toordinal() for i in x]) X = sm.add_constant(x) fit = sm.OLS(y, X).fit() prstd, iv_l, iv_u = wls_prediction_std(fit) _, summary_values, summary_names = summary_table(fit, alpha=alpha) df = pd.DataFrame(summary_values, columns=map(_snakify, summary_names)) # TODO: indexing w/ data frame is messing everything up fittedvalues = df['predicted_value'].values predict_mean_ci_low = df['mean_ci_95%_low'].values predict_mean_ci_upp = df['mean_ci_95%_upp'].values predict_ci_low = df['predict_ci_95%_low'].values predict_ci_upp = df['predict_ci_95%_upp'].values if x_is_date: x = [Timestamp.fromordinal(int(i)) for i in x] return (x, fittedvalues, predict_mean_ci_low, predict_mean_ci_upp)
def lowess(x, y, span=SPAN): "returns y-values estimated using the lowess function in statsmodels." """ for more see statsmodels.nonparametric.smoothers_lowess.lowess """ x, y = map(plot_friendly, [x,y]) x_is_date = _isdate(x[0]) if x_is_date: x = np.array([i.toordinal() for i in x]) result = smlowess(np.array(y), np.array(x), frac=span) x = pd.Series(result[::,0]) y = pd.Series(result[::,1]) lower, upper = stats.t.interval(span, len(x), loc=0, scale=2) std = np.std(y) y1 = pd.Series(lower * std + y).tolist() y2 = pd.Series(upper * std + y).tolist() if x_is_date: x = [Timestamp.fromordinal(int(i)) for i in x] return (x, y, y1, y2)
def lm(x, y, alpha=ALPHA): "fits an OLS from statsmodels. returns tuple." x, y = map(plot_friendly, [x,y]) x_is_date = _isdate(x[0]) if x_is_date: x = np.array([i.toordinal() for i in x]) X = sm.add_constant(x) fit = sm.OLS(y, X).fit() prstd, iv_l, iv_u = wls_prediction_std(fit) _, summary_values, summary_names = summary_table(fit, alpha=alpha) df = pd.DataFrame(summary_values, columns=map(snakify, summary_names)) fittedvalues = df['predicted_value'] predict_mean_se = df['std_error_mean_predict'] predict_mean_ci_low = df['mean_ci_95%_low'] predict_mean_ci_upp = df['mean_ci_95%_upp'] predict_ci_low = df['predict_ci_95%_low'] predict_ci_upp = df['predict_ci_95%_upp'] if x_is_date: x = [Timestamp.fromordinal(int(i)) for i in x] return (x, fittedvalues.tolist(), predict_mean_ci_low.tolist(), predict_mean_ci_upp.tolist())
def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False): if com._count_not_none(start, end, periods) < 2: raise ValueError('Must specify two of start, end, or periods') _normalized = True if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) inferred_tz = tools._infer_tzinfo(start, end) if tz is not None and inferred_tz is not None: assert(inferred_tz == tz) elif inferred_tz is not None: tz = inferred_tz tz = tools._maybe_get_tz(tz) if start is not None: if normalize: start = normalize_date(start) _normalized = True else: _normalized = _normalized and start.time() == _midnight if end is not None: if normalize: end = normalize_date(end) _normalized = True else: _normalized = _normalized and end.time() == _midnight if hasattr(offset, 'delta') and offset != offsets.Day(): if inferred_tz is None and tz is not None: # naive dates if start is not None and start.tz is None: start = start.tz_localize(tz) if end is not None and end.tz is None: end = end.tz_localize(tz) if start and end: if start.tz is None and end.tz is not None: start = start.tz_localize(end.tz) if end.tz is None and start.tz is not None: end = end.tz_localize(start.tz) if (offset._should_cache() and not (offset._normalize_cache and not _normalized) and _naive_in_cache_range(start, end)): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: index = _generate_regular_range(start, end, periods, offset) else: if inferred_tz is None and tz is not None: # naive dates if start is not None and start.tz is not None: start = start.replace(tzinfo=None) if end is not None and end.tz is not None: end = end.replace(tzinfo=None) if start and end: if start.tz is None and end.tz is not None: end = end.replace(tzinfo=None) if end.tz is None and start.tz is not None: start = start.replace(tzinfo=None) if (offset._should_cache() and not (offset._normalize_cache and not _normalized) and _naive_in_cache_range(start, end)): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: index = _generate_regular_range(start, end, periods, offset) if tz is not None and getattr(index, 'tz', None) is None: index = lib.tz_localize_to_utc(com._ensure_int64(index), tz) index = index.view(_NS_DTYPE) index = index.view(cls) index.name = name index.offset = offset index.tz = tz return index
def onOffset(cls, dt): if isinstance(dt, np.datetime64): dt = Timestamp(dt) return dt.weekday() < 5
def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False): _normalized = True if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) inferred_tz = tools._infer_tzinfo(start, end) if tz is not None and inferred_tz is not None: assert inferred_tz == tz elif inferred_tz is not None: tz = inferred_tz if inferred_tz is None and tz is not None: # naive dates if start is not None and start.tz is None: start = start.tz_localize(tz) if end is not None and end.tz is None: end = end.tz_localize(tz) elif inferred_tz is not None: pass if start and end: if start.tz is None and end.tz is not None: start = start.tz_localize(end.tz) if end.tz is None and start.tz is not None: end = end.tz_localize(start.tz) if start is not None: if normalize: start = normalize_date(start) _normalized = True else: _normalized = _normalized and start.time() == _midnight if end is not None: if normalize: end = normalize_date(end) _normalized = True else: _normalized = _normalized and end.time() == _midnight tz = tools._maybe_get_tz(tz) if com._count_not_none(start, end, periods) < 2: raise ValueError("Must specify two of start, end, or periods") if ( offset._should_cache() and not (offset._normalize_cache and not _normalized) and _naive_in_cache_range(start, end) ): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: index = _generate_regular_range(start, end, periods, offset) index = index.view(cls) index.name = name index.offset = offset index.tz = tz return index