def setUp(self): self.bday = CustomBusinessDay(calendar=USFederalHolidayCalendar())
day=1, offset=[Easter(), Day(-47)]), # Sexta feira Santa(GoodFriday) GoodFriday, Holiday('Tiradentes', month=4, day=21), Holiday('Dia do Trabalho', month=5, day=1), # Corpus Christi (60 dias após a Páscoa, ou 62 dias após a Sexta Feira Santa) Holiday('Corpus Christi', month=1, day=1, offset=[Easter(), Day(60)]), Holiday('Independência do Brasil', month=9, day=7), Holiday('Nossa Senhora Aparecida - Padroeira do Brasil', month=10, day=12), Holiday('Finados', month=11, day=2), Holiday('Proclamação da República', month=11, day=15), Holiday('Natal', month=12, day=25) ] ################# Exemplos ######################### br_feriados = CustomBusinessDay(calendar=Feriados_Brasil()) inst = Feriados_Brasil() # Gerar lista de feriados de 2001 até final de 2079 (mesmo período disponibilizado pela Anbima) lista_feriados = inst.holidays(dt.datetime(2000, 12, 31), dt.datetime(2079, 12, 31)) # Se uma data não é dia útil, por exemplo 01/01/2010, determinar o próximo dia útil pŕoximo_dia_util = date(2010, 1, 1) + br_feriados
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two'] ])), names=[u'first', u'second'])) series = dict( float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict( float=DataFrame({ u'A': series[u'float'], u'B': series[u'float'] + 1 }), int=DataFrame({ u'A': series[u'int'], u'B': series[u'int'] + 1 }), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame( { u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC') }, index=range(5))) with catch_warnings(record=True): mixed_dup_panel = Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'int'] }) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1 }), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
import pandas as pd from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay import numpy as np from matplotlib import pyplot as pt df = pd.read_csv( r"C:\Users\Ritish Adhikari\Desktop\CSV File\aapl_no_dates.csv") rows, columns = df.shape #print("Printing the original DF Dataframe") #print(df) print() print( "------------Putting a date range to the DF Dataframe with US holiday list and printing the dataframe with date time index---------" ) USB = CustomBusinessDay(calendar=USFederalHolidayCalendar()) dt = pd.date_range(start='2018-07-01', periods=rows, freq=USB) df.set_index(dt, inplace=True) print(df) print() print( "----------------------Printing Indian Calendar Now-------------------------------" ) from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday class MyBirthdayCalendar(AbstractHolidayCalendar): rules = [
from strategy import Strategy from backtest import Bt from statistic import indicator, index_accumulate_return import pandas as pd from typing import Callable import matplotlib.pyplot as plt from pandas.tseries.offsets import BDay from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay from data import Data US_BUSINESS_DAY = CustomBusinessDay(calendar=USFederalHolidayCalendar()) class Portfolio: def __init__(self, start_date, end_date): self.data = Data().data self.start_date = start_date self.end_date = end_date def run(self, hold_days: int, strategy: Callable): assert isinstance(hold_days, int), 'the type of hold_dates should be int.' signal_dict = self._strategy_ticker_signal(strategy) # {ticker: } ret_output = pd.Series() log_df = pd.DataFrame()
Holiday("Independence Day", month=7, day=15, observance=nearest_workday), Holiday("Veterans Day", month=8, day=14, observance=nearest_workday), Holiday("New Years Day", month=8, day=15, observance=nearest_workday), Holiday("New Years Day", month=8, day=16, observance=nearest_workday), Holiday("'体育の日", month=10, day=14, observance=nearest_workday), Holiday("New Years Day", month=12, day=30, observance=nearest_workday), Holiday("Black Friday", month=12, day=31, observance=nearest_workday), ] # umc-workingday # #using CustomBusinessDay cal = CustomBusinessDay(calendar=myCalendar()) #start to end frequency with calling custombusinessday s = pd.date_range('2019-01-01', '2019-12-31', freq=cal) #date format sumc = s.strftime('%Y-%m-%d') #extra date (saturday and sunday ) s1 = ['2019-12-28'] #Calling DataFrame constructor on s df = pd.DataFrame(sumc, columns=['umc']) #Calling DataFrame constructor on s1 df1 = pd.DataFrame(s1, columns=['umc']) #concatenating df and df1 on Dataframe data = pd.concat([df, df1], ignore_index=True) # converting to list umcdata = {'umc-workingday': data["umc"].tolist()}
import MySQLdb as mariadb from pymongo import MongoClient from sqlalchemy import create_engine, Table, Column, MetaData from sqlalchemy_utils import database_exists, create_database from contextlib import contextmanager from data import * # Duplicated from utils from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay from pandas.tseries.offsets import CustomBusinessMonthBegin US_Federal_Calendar = USFederalHolidayCalendar() bmth_us = CustomBusinessMonthBegin(calendar=US_Federal_Calendar) bday_us = CustomBusinessDay(calendar=US_Federal_Calendar) logging.basicConfig() logging.getLogger().setLevel(logging.WARN) Dtype_Mapping = { 'object': 'TEXT', 'int64': 'INT', 'float64': 'FLOAT', 'datetime64': 'DATETIME', 'bool': 'TINYINT', 'category': 'TEXT', 'timedelta[ns]': 'TEXT' }
#HOLIDAYS import pandas as pd import numpy as np from datetime import datetime from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday from pandas.tseries.offsets import CustomBusinessDay df = pd.read_csv('appl_no_dates.csv') print(df) rng = pd.date_range(start='7/1/2017', end='7/21/2017', freq='B') print(rng) us_cal = CustomBusinessDay(calendar=USFederalHolidayCalendar()) rng = pd.date_range(start='7/1/2017', end='7/21/2017', freq=us_cal) print(rng) df.set_index(rng, inplace=True) print(df) class myCalendar(AbstractHolidayCalendar): rules = [ Holiday('My Birth Day', month=4, day=12) #, observance=nearest_workday), ] my_bday = CustomBusinessDay(calendar=myCalendar()) print(pd.date_range('4/1/2017','4/30/2017',freq=my_bday)) print(pd.date_range(start='4/1/2017', end='4/30/2017',freq=my_bday))
plt.legend() plt.show() ''' # Creating a DataFrame containing the timeseries data along with the pca features data.iloc[:, 1:3] = X[:, 0:2] ts = data.iloc[:, 0:3] ts.columns = ['InvoiceDate', 'PCA1', 'PCA2'] ts.index = ts.InvoiceDate ts = ts.iloc[:, 1:3] #ts1 = ts.drop_duplicates() UBO = ts from pandas.tseries.offsets import CustomBusinessDay week_mask = 'Mon' bo1 = CustomBusinessDay(weekmask=week_mask) BOM = ts.asfreq(freq=bo1, method='ffill') # Applying PCA from sklearn.decomposition import PCA pca1 = PCA( n_components=1 ) # first run with the n_components=None , then after seeing the explained variance, choose the number BOM = pca1.fit_transform(BOM) explained_variance = pca1.explained_variance_ratio_ plt.plot(BOM, label='Monday') plt.title('Biased Observer - Monday 2') plt.legend('M') plt.figure('1') from pandas.tseries.offsets import CustomBusinessDay week_mask = 'Tue'
def get_holidays(dt_start, dt_end): br_holidays = CustomBusinessDay(calendar=BrazilHolidays()) inst = BrazilHolidays() return inst.holidays(dt_start, dt_end)
def __setstate__(self, state): self.business_days = state.pop("business_days") CustomBusinessDay.__setstate__(self, state)
def day(self): return CustomBusinessDay(holidays=self.adhoc_holidays, calendar=self.regular_holidays, weekmask="Mon Tue Wed Thu Fri")
def __init__(self): self.cbd = CustomBusinessDay(calendar=self.calendar, weekmask=self.weekmask) self.delta = timedelta_between(self.close_time, self.open_time)
#%% import matplotlib.pyplot as plt import numpy as np import pandas as pd import json from datetime import date from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay import pytz import os cwd = os.getcwd() print(cwd) path = 'Senior-Thesis/' us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar(), normalize=True) def isWorkingDay(x): d = us_bd.rollback(date(x.year, x.month, x.day)) return d.day == x.day and d.month == x.month and d.year == x.year #%% with open(path+'data/tusonload.json') as f: datafile = json.load(f) ldf = pd.DataFrame(datafile['series'][0]['data'], columns=['date', 'load']) ldf.date = pd.to_datetime(ldf.date) #%% a = ldf['load'].isnull() b = a.cumsum() c = ldf['load'].bfill() d = c + (b-b.mask(a).bfill().fillna(0).astype(int)).sub(1)
# New Years have difference observance method because the preceeding # Friday is year end and exchange is open Holiday('New Years Day', month=1, day=1, observance=sunday_to_monday), USMartinLutherKingJr, USPresidentsDay, USMemorialDay, Holiday('July 4th', month=7, day=4, observance=nearest_workday), USLaborDay, GoodFriday, USThanksgivingDay, Holiday('Christmas', month=12, day=25, observance=nearest_workday) ] # US business day us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar()) # US financial business day fi_bd = CustomBusinessDay(calendar=USFinancialHolidayCalendar()) fi_holidays = USFinancialHolidayCalendar().holidays _ = fi_holidays('19500101', '20500101') # Activate the holiday for better efficiency # US Exchange bussiness hour fi_bh = CustomBusinessHour(calendar=USFederalHolidayCalendar(), start='9:30', end='16:30') def weekday_distance(t1, t2, convention='forward'): """ Number of weekdays between t1 and t2: t2 - t1 Rolling forward convention: from 0 am+ to 0 am+
import numpy as np import pandas as pd import patsy from pandas.tseries.offsets import CustomBusinessDay from pandas.tseries.holiday import USFederalHolidayCalendar as calendar import scipy import pytz import re utils.setup_logging() _log = logging.getLogger(__name__) UTC_TZ = pytz.timezone('UTC') WORKING_DIR = os.getcwd() __version__ = 0.1 HOLIDAYS = pd.to_datetime(CustomBusinessDay(calendar=calendar()).holidays) def is_weekend_holiday(start, end, tz): if start.astimezone(tz).date() in HOLIDAYS and \ end.astimezone(tz).date() in HOLIDAYS: return True if start.astimezone(tz).weekday() > 4 and \ end.astimezone(tz).weekday() > 4: return True return False def sort_list(lst): sorted_list = [] for item in lst:
def generate_and_update_futures_data_file_4tickerhead(**kwargs): ticker_head = kwargs['ticker_head'] con = msu.get_my_sql_connection(**kwargs) if os.path.isfile(presaved_futures_data_folder + '/' + ticker_head + '.pkl'): old_data = pd.read_pickle(presaved_futures_data_folder + '/' + ticker_head + '.pkl') last_available_date = int( old_data['settle_date'].max().to_pydatetime().strftime('%Y%m%d')) date_from = cu.doubledate_shift(last_available_date, 60) data4_tickerhead = gfp.get_futures_price_4ticker( ticker_head=ticker_head, date_from=date_from, con=con) else: data4_tickerhead = gfp.get_futures_price_4ticker( ticker_head=ticker_head, con=con) data4_tickerhead = pd.merge(data4_tickerhead, dirty_data_points, on=['settle_date', 'ticker'], how='left') data4_tickerhead = data4_tickerhead[data4_tickerhead['discard'] != True] data4_tickerhead = data4_tickerhead.drop('discard', 1) data4_tickerhead['close_price'] = [ float(x) if x is not None else float('NaN') for x in data4_tickerhead['close_price'].values ] data4_tickerhead['open_price'] = [ float(x) if x is not None else float('NaN') for x in data4_tickerhead['open_price'].values ] data4_tickerhead['high_price'] = [ float(x) if x is not None else float('NaN') for x in data4_tickerhead['high_price'].values ] data4_tickerhead['low_price'] = [ float(x) if x is not None else float('NaN') for x in data4_tickerhead['low_price'].values ] data4_tickerhead['cont_indx'] = 100 * data4_tickerhead[ 'ticker_year'] + data4_tickerhead['ticker_month'] unique_cont_indx_list = data4_tickerhead['cont_indx'].unique() num_contracts = len(unique_cont_indx_list) unique_cont_indx_list = np.sort(unique_cont_indx_list) merged_dataframe_list = [None] * num_contracts bday_us = CustomBusinessDay(calendar=exp.get_calendar_4ticker_head('CL')) full_dates = pd.date_range(start=data4_tickerhead['settle_date'].min(), end=data4_tickerhead['settle_date'].max(), freq=bday_us) for i in range(num_contracts): contract_data = data4_tickerhead[data4_tickerhead['cont_indx'] == unique_cont_indx_list[i]] contract_full_dates = full_dates[ (full_dates >= contract_data['settle_date'].min()) & (full_dates <= contract_data['settle_date'].max())] full_date_frame = pd.DataFrame(contract_full_dates, columns=['settle_date']) merged_dataframe_list[i] = pd.merge(full_date_frame, contract_data, on='settle_date', how='left') merged_dataframe_list[i]['ticker'] = contract_data['ticker'][ contract_data.index[0]] merged_dataframe_list[i]['ticker_head'] = contract_data['ticker_head'][ contract_data.index[0]] merged_dataframe_list[i]['ticker_month'] = contract_data[ 'ticker_month'][contract_data.index[0]] merged_dataframe_list[i]['ticker_year'] = contract_data['ticker_year'][ contract_data.index[0]] merged_dataframe_list[i]['cont_indx'] = contract_data['cont_indx'][ contract_data.index[0]] merged_dataframe_list[i][ 'change1'] = merged_dataframe_list[i]['close_price'].shift( -2) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i]['change1_instant'] = merged_dataframe_list[i][ 'close_price'].shift(-1) - merged_dataframe_list[i]['close_price'] merged_dataframe_list[i]['high1_instant'] = merged_dataframe_list[i][ 'high_price'].shift(-1) merged_dataframe_list[i]['low1_instant'] = merged_dataframe_list[i][ 'low_price'].shift(-1) merged_dataframe_list[i][ 'change2'] = merged_dataframe_list[i]['close_price'].shift( -3) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i][ 'change5'] = merged_dataframe_list[i]['close_price'].shift( -6) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i][ 'change10'] = merged_dataframe_list[i]['close_price'].shift( -11) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i][ 'change20'] = merged_dataframe_list[i]['close_price'].shift( -21) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i]['change_5'] = merged_dataframe_list[i][ 'close_price'] - merged_dataframe_list[i]['close_price'].shift(5) merged_dataframe_list[i]['change_1'] = merged_dataframe_list[i][ 'close_price'] - merged_dataframe_list[i]['close_price'].shift(1) data4_tickerhead = pd.concat(merged_dataframe_list) if os.path.isfile(presaved_futures_data_folder + '/' + ticker_head + '.pkl'): data4_tickerhead['past_indx'] = [ 1 if np.isfinite(x) else 0 for x in data4_tickerhead['change_5'].values ] clean_data = data4_tickerhead clean_data['frame_indx'] = 1 data_columns = old_data.columns old_data['frame_indx'] = 0 old_data['past_indx'] = [ 1 if np.isfinite(x) else 0 for x in old_data['change_5'].values ] merged_data = pd.concat([old_data, clean_data], ignore_index=True, sort=True) merged_data.sort_values( ['cont_indx', 'settle_date', 'past_indx', 'frame_indx'], ascending=[True, True, False, False], inplace=True) merged_data.drop_duplicates(subset=['settle_date', 'cont_indx'], keep='first', inplace=True) data4_tickerhead = merged_data.drop(['frame_indx', 'past_indx'], 1, inplace=False) data4_tickerhead = data4_tickerhead[data_columns] data4_tickerhead.to_pickle(presaved_futures_data_folder + '/' + ticker_head + '.pkl') if 'con' not in kwargs.keys(): con.close()
def _processOption1Input(self): ''' Validates input for option 1. ''' #print("_procOp1") # debug self._symbols = self._symbolInput1.get().upper() self._startDate = self._startDateEntry.get() self._endDate = self._endDateEntry.get() endDateCheck = tuple(self._endDate.split('-')) startDateCheck = tuple(self._startDate.split('-')) if (self._symbols and self._startDate and self._endDate): # Convert symbols to tuple self._symbolList = [ symbol.strip() for symbol in self._symbols.split(',') ] if "" in self._symbolList: self._symbolList.remove("") #print(self._symbolList) # No more than 4 symbols. limit = 4 if len(self._symbolList) <= limit: for symbol in self._symbolList: if not symbol.isalpha(): raise ValueError( "Symbols must be alphabetical characters.") else: raise ValueError("There may be no more than " + str(limit) + " symbols.") # Must all be alphabetical chars. # date checks # 1. Correct format # 2. An date in the calendar # 3. The date ends before the start date if len(endDateCheck) is 3 and len( startDateCheck) is 3: # Correct format? # Are they all numbers? Check by converting to int #print(endDateCheck, startDateCheck) endDateList = [int(num) for num in endDateCheck] startDateList = [int(num) for num in startDateCheck] present = datetime.datetime.now() # Now check to see that the end date is later than the start date if not (datetime.datetime(*endDateList) <= datetime.datetime.now()): raise ValueError( "The end date needs to either occur today or before today." ) # and the endDate is no further than the current day. elif not (datetime.datetime(*endDateList) >= datetime.datetime(*startDateList)): raise ValueError( "The end date needs to occur after the start date.") else: # So if those two errors aren't raised, modify the start date and end date # to ensure that both are business days and not weekends nor holidays. # If they are, then roll them to the nearest business day. self._startDate = str( np.busday_offset(self._startDate, 0, roll='forward')) self._endDate = str( np.busday_offset(self._endDate, 0, roll='backward')) us_bd = CustomBusinessDay( calendar=USFederalHolidayCalendar()) # also create a list of the dates in between the start and the end date to use in DataRetriever. self._wantedDates = list( pd.DatetimeIndex(start=self._startDate, end=self._endDate, freq=us_bd).format()) self._startDate = self._wantedDates[0] self._endDate = self._wantedDates[-1] else: raise ValueError('Invalid date format!') else: #pass raise ValueError( "All three fields need to be filled before a graph " + "can be created.")
def day(self): """ The days on which our exchange will be open. """ weekmask = "Mon Tue Wed Thu Fri Sat Sun" return CustomBusinessDay(weekmask=weekmask)
pd.date_range(start="7/1/2017", end="7/21/2017", freq='B') # In[4]: from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay # In[5]: usb = CustomBusinessDay(calendar=USFederalHolidayCalendar()) # In[6]: rng = pd.date_range(start="7/1/2017", end="7/21/2017", freq=usb) # In[7]: df.set_index(rng, inplace=True) df
def create_data(): """ create the pickle data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], )) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({ "A": series["float"], "B": series["float"] + 1 }), int=DataFrame({ "A": series["int"], "B": series["int"] + 1 }), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )
class FrenchBusinessCalendar(AbstractHolidayCalendar): rules = [ Holiday('New Years Day', month=1, day=1), EasterMonday, Holiday('Labour Day', month=5, day=1), Holiday('Victory in Europe Day', month=5, day=8), Holiday('Ascension Day', month=1, day=1, offset=[Easter(), Day(39)]), Holiday('Bastille Day', month=7, day=14), Holiday('Assumption of Mary to Heaven', month=8, day=15), Holiday('All Saints Day', month=11, day=1), Holiday('Armistice Day', month=11, day=11), Holiday('Christmas Day', month=12, day=25) ] French_BD = CustomBusinessDay(calendar=FrenchBusinessCalendar()) s = pd.date_range('2016-12-29', end='2021-01-03', freq=French_BD) df = pd.DataFrame(s, columns=['Date']) # Define fares depending on day time normal_dict = { 'day_first_hour_fare': '40', 'night_first_hour_fare': '49.50', 'day_subsequent_hour_fare': '32', 'night_subsequent_hour_fare': '37.50' } holiday_dict = { 'day_first_hour_fare': '49.50', 'night_first_hour_fare': '57', 'day_subsequent_hour_fare': '37.50',
def create_data(): """create the pickle data""" data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = { "timestamp": Timestamp("20130101"), "period": Period("2012", "M") } index = { "int": Index(np.arange(10)), "date": date_range("20130101", periods=10), "period": period_range("2013-01-01", freq="M", periods=10), "float": Index(np.arange(10, dtype=np.float64)), "uint": Index(np.arange(10, dtype=np.uint64)), "timedelta": timedelta_range("00:00:00", freq="30T", periods=10), } index["range"] = RangeIndex(10) index["interval"] = interval_range(0, periods=10) mi = { "reg2": MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], ) } series = { "float": Series(data["A"]), "int": Series(data["B"]), "mixed": Series(data["E"]), "ts": Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), "mi": Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), "dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), "cat": Series(Categorical(["foo", "bar", "baz"])), "dt": Series(date_range("20130101", periods=5)), "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")), "period": Series([Period("2000Q1")] * 5), } mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = { "float": DataFrame({ "A": series["float"], "B": series["float"] + 1 }), "int": DataFrame({ "A": series["int"], "B": series["int"] + 1 }), "mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), "mi": DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), "dup": DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), "cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}), "cat_and_float": DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), "mixed_dup": mixed_dup_df, "dt_mixed_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), "dt_mixed2_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), } cat = { "int8": Categorical(list("abcdefg")), "int16": Categorical(np.arange(1000)), "int32": Categorical(np.arange(10000)), } timestamp = { "normal": Timestamp("2011-01-01"), "nat": NaT, "tz": Timestamp("2011-01-01", tz="US/Eastern"), } timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return { "series": series, "frame": frame, "index": index, "scalars": scalars, "mi": mi, "sp_series": { "float": _create_sp_series(), "ts": _create_sp_tsseries() }, "sp_frame": { "float": _create_sp_frame() }, "cat": cat, "timestamp": timestamp, "offsets": off, }
def day(self): return CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat Sun')
def get_dates(start_date, num_days): us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar()) dates = pd.date_range(start_date, periods=num_days, freq=us_bd) return np.array(dates.to_pydatetime(), dtype=np.datetime64)
def trade_date_offset(self): """ 交易日offset :return: """ return CustomBusinessDay(holidays=self.holiday_calendar.tolist())
def day(self): return CustomBusinessDay( holidays=self.adhoc_holidays, calendar=self.regular_holidays, weekmask=self.weekmask, )
#with size 10 & 75 df10=df2[0:10] df75=df2[0:75] # #from pandas.tseries.holiday import USFederalHolidayCalendar # #indias celender is not in pandas we can create holidays of it # #In india sunday is holiday so i use mask sunday(we can do better) # #let say 2 july also an holiday(assume) # # so all sundays and selected holidays we can handle in this way # In[311]: from pandas.tseries.offsets import CustomBusinessDay hh=CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat',holidays=['2017-07-02']) # In[312]: r1=pd.date_range(start="6/1/2015",end="6/11/2015",freq=hh) df10.set_index(r1,inplace=True) df10.head() # In[313]: df10.plot()
accuracy_svr_lin = clf_svr_lin.score(X_svr_lin_test, y_svr_lin_test) accuracy_svr_rbf = clf_svr_rbf.score(X_svr_rbf_test, y_svr_rbf_test) accuracy_lin = clf_lin.score(X_lin_test, y_lin_test) # Predicting the data forecast_predicted_svr_lin = clf_svr_lin.predict(X_lately_svr_lin) forecast_predicted_svr_rbf = clf_svr_rbf.predict(X_lately_svr_rbf) forecast_predicted_lin = clf_lin.predict(X_lately_lin) # Creating the column with nan df_lin['Forecast_lin'] = np.nan df_svr_lin['Forecast_svr_lin'] = np.nan df_svr_rbf['Forecast_svr_rbf'] = np.nan # Getting the business and holiday days bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar()) #Last date on dataset in accordingly with forecasted days inserted last_date_lin = df_lin.iloc[-1].name last_date_svr_lin = df_svr_lin.iloc[-1].name last_date_svr_rbf = df_svr_rbf.iloc[-1].name one_day = "1" next_bday_lin = last_date_lin + '1' next_bday_svr_lin = last_date_svr_lin + "1" next_bday_svr_rbf = last_date_svr_rbf + "1" # Looping to adding every predicted price in the right date for i in forecast_predicted_svr_lin: next_date_svr_lin = next_bday_svr_lin next_bday_svr_lin += one_day df_svr_lin.loc[next_date_svr_lin] = [np.nan for _ in range(len(df_svr_lin.columns)-1)] + [i]
def test_get_clean_factor_and_forward_returns_6(self): """ Test get_clean_factor_and_forward_returns with a daily factor on a business day calendar and holidays """ tickers = ['A', 'B', 'C', 'D', 'E', 'F'] factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2} price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i] for i in range(1, 22)] # 21 days = 18 + 3 fwd returns factor_data = [[3, 4, 2, 1, nan, nan], [3, nan, nan, 1, 4, 2], [3, 4, 2, 1, nan, nan]] * 6 # 18 days start = '2017-1-12' factor_end = '2017-2-10' price_end = '2017-2-15' # 3D (business day) fwd returns holidays = ['2017-1-13', '2017-1-18', '2017-1-30', '2017-2-7'] holidays = [Timestamp(d) for d in holidays] # 首先定义 freq 对象 freq = CustomBusinessDay(normalize=True, holidays=holidays) price_index = date_range(start=start, end=price_end, freq=freq) price_index.name = 'date' # 使用`drop`方法 丢失 freq # price_index = price_index.drop(holidays) prices = DataFrame(index=price_index, columns=tickers, data=price_data) factor_index = date_range(start=start, end=factor_end, freq=freq) factor_index.name = 'date' # factor_index = factor_index.drop(holidays) factor = DataFrame(index=factor_index, columns=tickers, data=factor_data).stack() factor_data = get_clean_factor_and_forward_returns( factor, prices, groupby=factor_groups, quantiles=4, periods=(1, 2, 3)) expected_idx = factor.index.rename(['date', 'asset']) expected_cols = [ '1D', '2D', '3D', 'factor', 'group', 'factor_quantile' ] expected_data = [ [0.1, 0.21, 0.331, 3.0, 1, 3], [-0.5, -0.75, -0.875, 4.0, 2, 4], [2.0, 8.00, 26.000, 2.0, 1, 2], [-0.1, -0.19, -0.271, 1.0, 2, 1], [0.1, 0.21, 0.331, 3.0, 1, 3], [-0.1, -0.19, -0.271, 1.0, 2, 1], [-0.5, -0.75, -0.875, 4.0, 1, 4], [0.0, 0.00, 0.000, 2.0, 2, 2], [0.1, 0.21, 0.331, 3.0, 1, 3], [-0.5, -0.75, -0.875, 4.0, 2, 4], [2.0, 8.00, 26.000, 2.0, 1, 2], [-0.1, -0.19, -0.271, 1.0, 2, 1] ] * 6 # 18 days expected = DataFrame(index=expected_idx, columns=expected_cols, data=expected_data) expected['group'] = expected['group'].astype('category') assert_frame_equal(factor_data, expected) # inferred_holidays = factor_data.index.levels[0].freq.holidays # assert sorted(holidays) == sorted(inferred_holidays) assert not any(factor_data.index.levels[0].isin(holidays))