def mktimerange( time_resolution: TimeResolution, date_from: Union[datetime, str], date_to: Union[datetime, str] = None) -> Tuple[Timestamp, Timestamp]: """ Compute appropriate time ranges for monthly and annual time resolutions. This takes into account to properly floor/ceil the date_from/date_to values to respective "begin of month/year" and "end of month/year" values. Args: time_resolution: time resolution as enumeration date_from: datetime string or object date_to: datetime string or object Returns: Tuple of two Timestamps: "date_from" and "date_to" """ if date_to is None: date_to = date_from if time_resolution == TimeResolution.ANNUAL: date_from = pd.to_datetime(date_from) - YearBegin(1) date_to = pd.to_datetime(date_to) + YearEnd(1) elif time_resolution == TimeResolution.MONTHLY: date_from = pd.to_datetime(date_from) - MonthBegin(1) date_to = pd.to_datetime(date_to) + MonthEnd(1) else: raise NotImplementedError( "mktimerange only implemented for annual and monthly time ranges") return date_from, date_to
def ls_returns_sheet(self, cur_day=None): if cur_day is None: cur_day = pd.to_datetime( data_source.trade_calendar.get_latest_trade_days( datetime.today().strftime("%Y%m%d"))) else: cur_day = pd.to_datetime(cur_day) dates = [ cur_day, cur_day.to_period('W').start_time, cur_day + MonthBegin(-1), cur_day + QuarterBegin(-1), cur_day + MonthBegin(-6), cur_day + YearBegin(-1), cur_day + YearBegin(-2) ] returns = list(map(lambda x: self.ls_range_pct(x, cur_day), dates)) + \ [self.ls_annual_return, self.ls_total_return] return pd.DataFrame([returns], columns=[ '日回报', '本周以来', '本月以来', '本季以来', '近6个月', '今年以来', '近两年', '年化回报', '成立以来' ])
def _split_by_year(tile, time_dim='time'): start_range = tile.sources[time_dim][0].data end_range = tile.sources[time_dim][-1].data for date in pd.date_range(start=YearBegin(normalize=True).rollback(start_range), end=end_range, freq='AS', normalize=True): sources_slice = tile.sources.loc[{time_dim: slice(date, YearEnd(normalize=True).rollforward(date))}] year_str = '{0:%Y}'.format(date) yield year_str, Tile(sources=sources_slice, geobox=tile.geobox)
def to_offset(self) -> DateOffset: if self.value == "H": return Hour(1) elif self.value == "D": return Day(1) elif self.value == "W-MON": return Week(1, weekday=0) elif self.value == "MS": return MonthBegin(1) elif self.value == "QS-DEC": return QuarterBegin(startingMonth=10) elif self.value == "AS": return YearBegin(1) raise NotImplementedError(self.value)
def timestamp_rollforward_rollback(): """ How to role the date forward (end of time) or backward (beg of time) """ now = datetime(2014, 4, 15) print "Current time is:", now now = now + 3 * Day() print "Adding 3 days to now:", now offset = MonthEnd() now = offset.rollforward(now) print "Rolling foward to last day of the month", now offset = MonthBegin() now = offset.rollback(now) print "Rolling foward to first day of the month", now ts = pd.Series(np.random.randn(20), index=pd.date_range('1/1/2000', periods=20, freq='4d')) print "Original Time Series is:\n", ts offset = YearBegin() ts = ts.groupby(offset.rollforward).mean() print "Time Series after rolling forward\n", ts
def get_dividends(start, end, **kwargs): start = (pd.to_datetime(start) - YearBegin(1)).strftime("%Y%m%d") end = (pd.to_datetime(end) + YearEnd(1)).strftime("%Y%m%d") raw_dividends = uqer_db.run_api( "EquDivGet", beginDate=start, endDate=end, field=["endDate", "ticker", "publishDate", "perCashDiv"]) raw_dividends.dropna(inplace=True) raw_dividends['endDate'] = (raw_dividends['endDate'].str.replace( '-', '')).astype('int') raw_dividends['publishDate'] = (raw_dividends['publishDate'].str.replace( '-', '')).astype('int') raw_dividends['ticker'] = raw_dividends['ticker'].astype('int') raw_dividends.sort_values(['ticker', 'endDate', 'publishDate'], inplace=True) raw_dividends.rename(columns={ 'ticker': 'IDs', 'endDate': 'date', 'publishDate': 'ann_dt', 'perCashDiv': 'dividend' }, inplace=True) h5db.save_h5file(raw_dividends, 'cash_div', '/dividends/')
class TestYearBegin(Base): _offset = YearBegin def test_misspecified(self): with pytest.raises(ValueError, match="Month must go from 1 to 12"): YearBegin(month=13) offset_cases = [] offset_cases.append((YearBegin(), { datetime(2008, 1, 1): datetime(2009, 1, 1), datetime(2008, 6, 30): datetime(2009, 1, 1), datetime(2008, 12, 31): datetime(2009, 1, 1), datetime(2005, 12, 30): datetime(2006, 1, 1), datetime(2005, 12, 31): datetime(2006, 1, 1)})) offset_cases.append((YearBegin(0), { datetime(2008, 1, 1): datetime(2008, 1, 1), datetime(2008, 6, 30): datetime(2009, 1, 1), datetime(2008, 12, 31): datetime(2009, 1, 1), datetime(2005, 12, 30): datetime(2006, 1, 1), datetime(2005, 12, 31): datetime(2006, 1, 1)})) offset_cases.append((YearBegin(3), { datetime(2008, 1, 1): datetime(2011, 1, 1), datetime(2008, 6, 30): datetime(2011, 1, 1), datetime(2008, 12, 31): datetime(2011, 1, 1), datetime(2005, 12, 30): datetime(2008, 1, 1), datetime(2005, 12, 31): datetime(2008, 1, 1)})) offset_cases.append((YearBegin(-1), { datetime(2007, 1, 1): datetime(2006, 1, 1), datetime(2007, 1, 15): datetime(2007, 1, 1), datetime(2008, 6, 30): datetime(2008, 1, 1), datetime(2008, 12, 31): datetime(2008, 1, 1), datetime(2006, 12, 29): datetime(2006, 1, 1), datetime(2006, 12, 30): datetime(2006, 1, 1), datetime(2007, 1, 1): datetime(2006, 1, 1)})) offset_cases.append((YearBegin(-2), { datetime(2007, 1, 1): datetime(2005, 1, 1), datetime(2008, 6, 30): datetime(2007, 1, 1), datetime(2008, 12, 31): datetime(2007, 1, 1)})) offset_cases.append((YearBegin(month=4), { datetime(2007, 4, 1): datetime(2008, 4, 1), datetime(2007, 4, 15): datetime(2008, 4, 1), datetime(2007, 3, 1): datetime(2007, 4, 1), datetime(2007, 12, 15): datetime(2008, 4, 1), datetime(2012, 1, 31): datetime(2012, 4, 1)})) offset_cases.append((YearBegin(0, month=4), { datetime(2007, 4, 1): datetime(2007, 4, 1), datetime(2007, 3, 1): datetime(2007, 4, 1), datetime(2007, 12, 15): datetime(2008, 4, 1), datetime(2012, 1, 31): datetime(2012, 4, 1)})) offset_cases.append((YearBegin(4, month=4), { datetime(2007, 4, 1): datetime(2011, 4, 1), datetime(2007, 4, 15): datetime(2011, 4, 1), datetime(2007, 3, 1): datetime(2010, 4, 1), datetime(2007, 12, 15): datetime(2011, 4, 1), datetime(2012, 1, 31): datetime(2015, 4, 1)})) offset_cases.append((YearBegin(-1, month=4), { datetime(2007, 4, 1): datetime(2006, 4, 1), datetime(2007, 3, 1): datetime(2006, 4, 1), datetime(2007, 12, 15): datetime(2007, 4, 1), datetime(2012, 1, 31): datetime(2011, 4, 1)})) offset_cases.append((YearBegin(-3, month=4), { datetime(2007, 4, 1): datetime(2004, 4, 1), datetime(2007, 3, 1): datetime(2004, 4, 1), datetime(2007, 12, 15): datetime(2005, 4, 1), datetime(2012, 1, 31): datetime(2009, 4, 1)})) @pytest.mark.parametrize('case', offset_cases) def test_offset(self, case): offset, cases = case for base, expected in compat.iteritems(cases): assert_offset_equal(offset, base, expected) on_offset_cases = [(YearBegin(), datetime(2007, 1, 3), False), (YearBegin(), datetime(2008, 1, 1), True), (YearBegin(), datetime(2006, 12, 31), False), (YearBegin(), datetime(2006, 1, 2), False)] @pytest.mark.parametrize('case', on_offset_cases) def test_onOffset(self, case): offset, dt, expected = case assert_onOffset(offset, dt, expected)
def test_misspecified(self): with pytest.raises(ValueError, match="Month must go from 1 to 12"): YearBegin(month=13)
def create_data(): """ create the pickle/msgpack data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], )) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({ "A": series["float"], "B": series["float"] + 1 }), int=DataFrame({ "A": series["int"], "B": series["int"] + 1 }), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )
def time_for_next_update(last_time, freq='D', num=9, is_end=False): """前次更新后下一次更新时间 Arguments: last_time {obj} -- 上次时间 Keyword Arguments: freq {str} -- 更新周期 (default: {'D'}) num {int} -- 日级别以下为单位数,以上为小时数 (default: {9}) is_end {bool} -- 是否为周期尾部 (default: {False}) Raises: TypeError: 不能识别的周期类型 Returns: Timestamp -- 下一次更新时间 Notes: 一、 freq < D `num`代表周期数 上一时点`normalize`后移动`num`周期,不考虑开始及结束问题 二、 freq in D、B `num`代表小时 对于历史时间,上一时点`normalize`后一律移动到下一个周期,且将小时调整到指定的num 如上一时点其日期为当前日期,且在其`normalize`及调整小时后的值晚于上一时点,则取调整后的值 三、 freq > D 开始及结束才有效 `num`无效 如周初、周末、月初、月末、季初、季末、年初、年末 此时num数字不起作用 """ valid_freq = ('B', 'D', 'W', 'M', 'Q', 'H', 'MIN') if pd.isnull(last_time): return pd.Timestamp(MARKET_START) assert isinstance( last_time, pd.Timestamp), f'类型错误,希望Timestamp,实际为{type(last_time)}' now = pd.Timestamp.now(tz=last_time.tz) assert last_time <= now, '过去时间必须小于当前时间' freq = freq.upper() if freq == 'MIN': offset = Minute(n=num) return offset.apply(last_time.floor(freq)) if freq == 'H': offset = Hour(n=num) return offset.apply(last_time.floor(freq)) if freq == 'D': # √ 此处要考虑小时数 limit = last_time.floor(freq).replace(hour=num) if last_time < limit: return limit else: offset = Day() return offset.apply(last_time.floor(freq)).replace(hour=num) if freq == 'B': offset = BDay() # 工作日 if last_time.weekday() in range(0, 5): # √ 此处要考虑小时数 limit = last_time.normalize().replace(hour=num) if last_time < limit: return limit else: return offset.apply(last_time.normalize()).replace(hour=num) else: return offset.apply(last_time.normalize()).replace(hour=num) if freq == 'W': nw = last_time.normalize() + pd.Timedelta(weeks=1) if is_end: return nw + pd.Timedelta(days=7-nw.weekday()) - pd.Timedelta(nanoseconds=1) else: return nw - pd.Timedelta(days=nw.weekday()) if freq == 'M': if is_end: offset = MonthEnd(n=2) res = offset.apply(last_time.normalize()) if last_time.is_month_end: res = offset.rollback(res) return res else: offset = MonthBegin() return offset.apply(last_time.normalize()) if freq == 'Q': if is_end: offset = QuarterEnd(n=2, startingMonth=3, normalize=True) res = offset.apply(last_time) if last_time.is_quarter_end: offset = QuarterEnd(n=-1, startingMonth=3, normalize=True) res = offset.apply(res) return res else: offset = QuarterBegin(n=1, normalize=True, startingMonth=1) return offset.apply(last_time) if freq == 'Y': if last_time.year == now.year: if is_end: return last_time.normalize().replace(year=now.year, month=12, day=31) else: return last_time.normalize().replace(year=now.year, month=1, day=1) if is_end: offset = YearEnd(normalize=True, month=12, n=2) res = offset.apply(last_time) if last_time.is_year_end: offset = YearEnd(n=-1, month=12, normalize=True) res = offset.apply(res) return res else: offset = YearBegin(normalize=True, month=1, n=1) return offset.apply(last_time) raise ValueError('不能识别的周期类型,仅接受{}。实际输入为{}'.format( valid_freq, freq))
#! coding: utf-8 from django.conf import settings from pandas.tseries.offsets import YearBegin, QuarterBegin, MonthBegin, Day # Transformaciones VALUE = 'value' CHANGE = 'change' PCT_CHANGE = 'percent_change' CHANGE_YEAR_AGO = 'change_a_year_ago' PCT_CHANGE_YEAR_AGO = 'percent_change_a_year_ago' CHANGE_BEG_YEAR = 'change_since_beginning_of_year' PCT_CHANGE_BEG_YEAR = 'percent_change_since_beginning_of_year' # Pandas freqs PANDAS_YEAR = YearBegin() PANDAS_SEMESTER = MonthBegin(6) PANDAS_QUARTER = QuarterBegin(startingMonth=1) PANDAS_MONTH = MonthBegin() PANDAS_WEEK = Day(7) PANDAS_DAY = Day() # Frecuencias *en orden* de mayor a menor PANDAS_FREQS = [ PANDAS_YEAR, PANDAS_SEMESTER, PANDAS_QUARTER, PANDAS_MONTH, PANDAS_WEEK, PANDAS_DAY ] IDENTIFIER = "identifier" DATASET_IDENTIFIER = "dataset_identifier" DOWNLOAD_URL = "downloadURL"
def bin_df(df): years = df['$date_to'] - YearBegin(1) df.index = pd.DatetimeIndex(years, ambiguous='infer').floor('D') return df.groupby(level=0).count()
def create_data(): """create the pickle data""" data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = { "timestamp": Timestamp("20130101"), "period": Period("2012", "M") } index = { "int": Index(np.arange(10)), "date": date_range("20130101", periods=10), "period": period_range("2013-01-01", freq="M", periods=10), "float": Index(np.arange(10, dtype=np.float64)), "uint": Index(np.arange(10, dtype=np.uint64)), "timedelta": timedelta_range("00:00:00", freq="30T", periods=10), } index["range"] = RangeIndex(10) index["interval"] = interval_range(0, periods=10) mi = { "reg2": MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], ) } series = { "float": Series(data["A"]), "int": Series(data["B"]), "mixed": Series(data["E"]), "ts": Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), "mi": Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), "dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), "cat": Series(Categorical(["foo", "bar", "baz"])), "dt": Series(date_range("20130101", periods=5)), "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")), "period": Series([Period("2000Q1")] * 5), } mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = { "float": DataFrame({ "A": series["float"], "B": series["float"] + 1 }), "int": DataFrame({ "A": series["int"], "B": series["int"] + 1 }), "mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), "mi": DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), "dup": DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), "cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}), "cat_and_float": DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), "mixed_dup": mixed_dup_df, "dt_mixed_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), "dt_mixed2_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), } cat = { "int8": Categorical(list("abcdefg")), "int16": Categorical(np.arange(1000)), "int32": Categorical(np.arange(10000)), } timestamp = { "normal": Timestamp("2011-01-01"), "nat": NaT, "tz": Timestamp("2011-01-01", tz="US/Eastern"), } timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return { "series": series, "frame": frame, "index": index, "scalars": scalars, "mi": mi, "sp_series": { "float": _create_sp_series(), "ts": _create_sp_tsseries() }, "sp_frame": { "float": _create_sp_frame() }, "cat": cat, "timestamp": timestamp, "offsets": off, }
from flask import Flask, render_template, request, make_response import pandas as pd from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay, YearEnd, YearBegin from decimal import Decimal import requests from bs4 import BeautifulSoup import base64 import json app = Flask(__name__) today = pd.to_datetime("today") todays_date = str(today).split(" ")[0][5:] yearBegin = today - YearBegin() yearEnd = today + YearEnd() us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar()) holidays = USFederalHolidayCalendar().holidays(start=today, end=yearEnd, return_name=True) holidayDates = holidays.index workable_days_total = Decimal( len(pd.DatetimeIndex(start=yearBegin, end=yearEnd, freq=us_bd))) worked_days_todate = Decimal( len(pd.DatetimeIndex(start=yearBegin, end=today, freq=us_bd))) progress = round((worked_days_todate / workable_days_total) * 100, 0) workable_days_remaining = Decimal( len(pd.DatetimeIndex(start=today, end=yearEnd, freq=us_bd))) days_off = ""
def create_data(): """ create the pickle/msgpack data """ data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series( date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict(float=DataFrame({ 'A': series['float'], 'B': series['float'] + 1 }), int=DataFrame({ 'A': series['int'], 'B': series['int'] + 1 }), mixed=DataFrame({k: data[k] for k in ['A', 'B', 'C', 'D']}), mi=DataFrame( { 'A': np.arange(5).astype(np.float64), 'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}), cat_and_float=DataFrame({ 'A': Categorical(['foo', 'bar', 'baz']), 'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET'), 'C': Timestamp('20130603', tz='UTC') }, index=range(5))) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two'] ])), names=[u'first', u'second'])) series = dict( float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict( float=DataFrame({ u'A': series[u'float'], u'B': series[u'float'] + 1 }), int=DataFrame({ u'A': series[u'int'], u'B': series[u'int'] + 1 }), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame( { u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC') }, index=range(5))) with catch_warnings(record=True): mixed_dup_panel = Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'int'] }) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1 }), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < '0.19.2': timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def tickValues(self, minVal, maxVal, size): minVal, maxVal = sorted((minVal, maxVal)) # upon opening, we don't want any tick values if minVal == 0 and maxVal == 1: return [(0,[]), (0,[])] self._freqs = ['YEARLY', 'MONTHLY', 'DAILY', 'HOURLY', 'MINUTELY', 'SECONDLY'] self.minticks = 5 self.maxticks = {'YEARLY': 11, 'MONTHLY': 12, 'DAILY': 11, 'HOURLY': 12, 'MINUTELY': 11, 'SECONDLY': 11} self.interval_multiples = True self.intervald = { 'YEARLY': [1, 2, 4, 5, 10, 20, 40, 50, 100], 'MONTHLY': [1, 2, 3, 4, 6], 'DAILY': [1, 2, 3, 7, 14], 'HOURLY': [1, 2, 3, 4, 6, 12], 'MINUTELY': [1, 5, 10, 15, 30], 'SECONDLY': [1, 5, 10, 15, 30], } minDate = datetime.fromtimestamp(minVal) maxDate = datetime.fromtimestamp(maxVal) delta = relativedelta(maxDate, minDate) numYears = (delta.years * 1.0) numMonths = (numYears * 12.0) + delta.months numDays = (numMonths * 31.0) + delta.days numHours = (numDays * 24.0) + delta.hours numMinutes = (numHours * 60.0) + delta.minutes numSeconds = (numMinutes * 60.0) + delta.seconds numMicroseconds = (numSeconds * 1e6) + delta.microseconds nums = [numYears, numMonths, numDays, numHours, numMinutes, numSeconds, numMicroseconds] for (freq, num) in izip(self._freqs, nums): # If this particular frequency doesn't give enough ticks, continue if num < self.minticks: continue # Find the first available interval that doesn't give too many # ticks for interval in self.intervald[freq]: if num <= interval * (self.maxticks[freq] - 1): break else: # We went through the whole loop without breaking, default to # the last interval in the list and raise a warning warnings.warn('AutoDateLocator was unable to pick an ' 'appropriate interval for this date range. ' 'It may be necessary to add an interval value ' "to the AutoDateLocator's intervald dictionary." ' Defaulting to {0}.'.format(interval)) # Set some parameters as appropriate self._freq = freq break else: raise ValueError('No sensible date limit could be found') baseDate = datetime(minDate.year, minDate.month, minDate.day) if freq == 'YEARLY': offset = DateOffset(years=interval) majorTicks = date_range(baseDate + YearBegin(-1), maxDate, freq=offset) if freq == 'MONTHLY': offset = DateOffset(months=interval) majorTicks = date_range(baseDate + MonthBegin(-1), maxDate, freq=offset) if freq == 'WEEKLY': offset = DateOffset(weeks=interval) majorTicks = date_range(baseDate + DateOffset(days=-1), maxDate, freq=offset) if freq == 'DAILY': offset = DateOffset(days=interval) majorTicks = date_range(baseDate + DateOffset(days=-1), maxDate, freq=offset) if freq == 'HOURLY': offset = DateOffset(hours=interval) majorTicks = date_range(baseDate + DateOffset(days=-1), maxDate, freq=offset) majorTicks = majorTicks.tz_localize('US/Eastern') ticks = [ (0, majorTicks.asi8 / 1e9), (0,[])] return ticks
'A-JAN' : YearEnd(month=1), 'A-FEB' : YearEnd(month=2), 'A-MAR' : YearEnd(month=3), 'A-APR' : YearEnd(month=4), 'A-MAY' : YearEnd(month=5), 'A-JUN' : YearEnd(month=6), 'A-JUL' : YearEnd(month=7), 'A-AUG' : YearEnd(month=8), 'A-SEP' : YearEnd(month=9), 'A-OCT' : YearEnd(month=10), 'A-NOV' : YearEnd(month=11), 'A-DEC' : YearEnd(month=12), 'A' : YearEnd(month=12), # Annual - Calendar (start) 'AS-JAN' : YearBegin(month=1), 'AS' : YearBegin(month=1), 'AS-FEB' : YearBegin(month=2), 'AS-MAR' : YearBegin(month=3), 'AS-APR' : YearBegin(month=4), 'AS-MAY' : YearBegin(month=5), 'AS-JUN' : YearBegin(month=6), 'AS-JUL' : YearBegin(month=7), 'AS-AUG' : YearBegin(month=8), 'AS-SEP' : YearBegin(month=9), 'AS-OCT' : YearBegin(month=10), 'AS-NOV' : YearBegin(month=11), 'AS-DEC' : YearBegin(month=12), # Annual - Business 'BA-JAN' : BYearEnd(month=1),
return jsonify(data=data, x=cols[0], y=cols[1], stats=stats) except BaseException as e: return jsonify( dict(error=str(e), traceback=str(traceback.format_exc()))) DATE_RANGES = { 'W': lambda today: today - Day(today.dayofweek), 'M': lambda today: today if today.is_month_start else today - MonthBegin(), 'Q': lambda today: today if today.is_quarter_start else today - QuarterBegin(startingMonth=1), 'Y': lambda today: today if today.is_year_start else today - YearBegin(), } @dtale.route('/coverage') @swag_from('swagger/dtale/views/coverage.yml') def find_coverage(): """ Flask route which returns coverage information(counts) for a column grouped by other column(s) :param query: string from flask.request.args['query'] which is applied to DATA using the query() function :param col: string from flask.request.args['col'] containing name of a column in your dataframe :param filters(deprecated): JSON string from flaks.request.args['filters'] with filtering information from group drilldown [ {name: col1, prevFreq: Y, freq: Q, date: YYYY-MM-DD}, ...