def day(self): return CustomBusinessDay(holidays=self.adhoc_holidays, calendar=self.regular_holidays, weekmask="Mon Tue Wed Thu Fri")
def _get_custom_bd(exchange): from pandas.tseries.offsets import CustomBusinessDay calendar = GsCalendar.get(exchange).business_day_calendar() return CustomBusinessDay(calendar=calendar)
def day(self): return CustomBusinessDay( holidays=self.adhoc_holidays, calendar=self.regular_holidays, weekmask=self.weekmask, )
day=1, offset=[Easter(), Day(-47)]), # Sexta feira Santa(GoodFriday) GoodFriday, Holiday('Tiradentes', month=4, day=21), Holiday('Dia do Trabalho', month=5, day=1), # Corpus Christi (60 dias após a Páscoa, ou 62 dias após a Sexta Feira Santa) Holiday('Corpus Christi', month=1, day=1, offset=[Easter(), Day(60)]), Holiday('Independência do Brasil', month=9, day=7), Holiday('Nossa Senhora Aparecida - Padroeira do Brasil', month=10, day=12), Holiday('Finados', month=11, day=2), Holiday('Proclamação da República', month=11, day=15), Holiday('Natal', month=12, day=25) ] ################# Exemplos ######################### br_feriados = CustomBusinessDay(calendar=Feriados_Brasil()) inst = Feriados_Brasil() # Gerar lista de feriados de 2001 até final de 2079 (mesmo período disponibilizado pela Anbima) lista_feriados = inst.holidays(dt.datetime(2000, 12, 31), dt.datetime(2079, 12, 31)) # Se uma data não é dia útil, por exemplo 01/01/2010, determinar o próximo dia útil pŕoximo_dia_util = date(2010, 1, 1) + br_feriados
def bday(self, d): return d * CustomBusinessDay(calendar=self)
import pandas as pd from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay import numpy as np from matplotlib import pyplot as pt df = pd.read_csv( r"C:\Users\Ritish Adhikari\Desktop\CSV File\aapl_no_dates.csv") rows, columns = df.shape #print("Printing the original DF Dataframe") #print(df) print() print( "------------Putting a date range to the DF Dataframe with US holiday list and printing the dataframe with date time index---------" ) USB = CustomBusinessDay(calendar=USFederalHolidayCalendar()) dt = pd.date_range(start='2018-07-01', periods=rows, freq=USB) df.set_index(dt, inplace=True) print(df) print() print( "----------------------Printing Indian Calendar Now-------------------------------" ) from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday class MyBirthdayCalendar(AbstractHolidayCalendar): rules = [
from strategy import Strategy from backtest import Bt from statistic import indicator, index_accumulate_return import pandas as pd from typing import Callable import matplotlib.pyplot as plt from pandas.tseries.offsets import BDay from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay from data import Data US_BUSINESS_DAY = CustomBusinessDay(calendar=USFederalHolidayCalendar()) class Portfolio: def __init__(self, start_date, end_date): self.data = Data().data self.start_date = start_date self.end_date = end_date def run(self, hold_days: int, strategy: Callable): assert isinstance(hold_days, int), 'the type of hold_dates should be int.' signal_dict = self._strategy_ticker_signal(strategy) # {ticker: } ret_output = pd.Series() log_df = pd.DataFrame()
pd.date_range(start="7/1/2017", end="7/21/2017", freq='B') # In[4]: from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay # In[5]: usb = CustomBusinessDay(calendar=USFederalHolidayCalendar()) # In[6]: rng = pd.date_range(start="7/1/2017", end="7/21/2017", freq=usb) # In[7]: df.set_index(rng, inplace=True) df
class FrenchBusinessCalendar(AbstractHolidayCalendar): rules = [ Holiday('New Years Day', month=1, day=1), EasterMonday, Holiday('Labour Day', month=5, day=1), Holiday('Victory in Europe Day', month=5, day=8), Holiday('Ascension Day', month=1, day=1, offset=[Easter(), Day(39)]), Holiday('Bastille Day', month=7, day=14), Holiday('Assumption of Mary to Heaven', month=8, day=15), Holiday('All Saints Day', month=11, day=1), Holiday('Armistice Day', month=11, day=11), Holiday('Christmas Day', month=12, day=25) ] French_BD = CustomBusinessDay(calendar=FrenchBusinessCalendar()) s = pd.date_range('2016-12-29', end='2021-01-03', freq=French_BD) df = pd.DataFrame(s, columns=['Date']) # Define fares depending on day time normal_dict = { 'day_first_hour_fare': '40', 'night_first_hour_fare': '49.50', 'day_subsequent_hour_fare': '32', 'night_subsequent_hour_fare': '37.50' } holiday_dict = { 'day_first_hour_fare': '49.50', 'night_first_hour_fare': '57', 'day_subsequent_hour_fare': '37.50',
import numpy as np import pandas as pd import patsy from pandas.tseries.offsets import CustomBusinessDay from pandas.tseries.holiday import USFederalHolidayCalendar as calendar import scipy import pytz import re utils.setup_logging() _log = logging.getLogger(__name__) UTC_TZ = pytz.timezone('UTC') WORKING_DIR = os.getcwd() __version__ = 0.1 HOLIDAYS = pd.to_datetime(CustomBusinessDay(calendar=calendar()).holidays) def is_weekend_holiday(start, end, tz): if start.astimezone(tz).date() in HOLIDAYS and \ end.astimezone(tz).date() in HOLIDAYS: return True if start.astimezone(tz).weekday() > 4 and \ end.astimezone(tz).weekday() > 4: return True return False def sort_list(lst): sorted_list = [] for item in lst:
def _processOption1Input(self): ''' Validates input for option 1. ''' #print("_procOp1") # debug self._symbols = self._symbolInput1.get().upper() self._startDate = self._startDateEntry.get() self._endDate = self._endDateEntry.get() endDateCheck = tuple(self._endDate.split('-')) startDateCheck = tuple(self._startDate.split('-')) if (self._symbols and self._startDate and self._endDate): # Convert symbols to tuple self._symbolList = [ symbol.strip() for symbol in self._symbols.split(',') ] if "" in self._symbolList: self._symbolList.remove("") #print(self._symbolList) # No more than 4 symbols. limit = 4 if len(self._symbolList) <= limit: for symbol in self._symbolList: if not symbol.isalpha(): raise ValueError( "Symbols must be alphabetical characters.") else: raise ValueError("There may be no more than " + str(limit) + " symbols.") # Must all be alphabetical chars. # date checks # 1. Correct format # 2. An date in the calendar # 3. The date ends before the start date if len(endDateCheck) is 3 and len( startDateCheck) is 3: # Correct format? # Are they all numbers? Check by converting to int #print(endDateCheck, startDateCheck) endDateList = [int(num) for num in endDateCheck] startDateList = [int(num) for num in startDateCheck] present = datetime.datetime.now() # Now check to see that the end date is later than the start date if not (datetime.datetime(*endDateList) <= datetime.datetime.now()): raise ValueError( "The end date needs to either occur today or before today." ) # and the endDate is no further than the current day. elif not (datetime.datetime(*endDateList) >= datetime.datetime(*startDateList)): raise ValueError( "The end date needs to occur after the start date.") else: # So if those two errors aren't raised, modify the start date and end date # to ensure that both are business days and not weekends nor holidays. # If they are, then roll them to the nearest business day. self._startDate = str( np.busday_offset(self._startDate, 0, roll='forward')) self._endDate = str( np.busday_offset(self._endDate, 0, roll='backward')) us_bd = CustomBusinessDay( calendar=USFederalHolidayCalendar()) # also create a list of the dates in between the start and the end date to use in DataRetriever. self._wantedDates = list( pd.DatetimeIndex(start=self._startDate, end=self._endDate, freq=us_bd).format()) self._startDate = self._wantedDates[0] self._endDate = self._wantedDates[-1] else: raise ValueError('Invalid date format!') else: #pass raise ValueError( "All three fields need to be filled before a graph " + "can be created.")
plt.legend() plt.show() ''' # Creating a DataFrame containing the timeseries data along with the pca features data.iloc[:, 1:3] = X[:, 0:2] ts = data.iloc[:, 0:3] ts.columns = ['InvoiceDate', 'PCA1', 'PCA2'] ts.index = ts.InvoiceDate ts = ts.iloc[:, 1:3] #ts1 = ts.drop_duplicates() UBO = ts from pandas.tseries.offsets import CustomBusinessDay week_mask = 'Mon' bo1 = CustomBusinessDay(weekmask=week_mask) BOM = ts.asfreq(freq=bo1, method='ffill') # Applying PCA from sklearn.decomposition import PCA pca1 = PCA( n_components=1 ) # first run with the n_components=None , then after seeing the explained variance, choose the number BOM = pca1.fit_transform(BOM) explained_variance = pca1.explained_variance_ratio_ plt.plot(BOM, label='Monday') plt.title('Biased Observer - Monday 2') plt.legend('M') plt.figure('1') from pandas.tseries.offsets import CustomBusinessDay week_mask = 'Tue'
#HOLIDAYS import pandas as pd import numpy as np from datetime import datetime from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday from pandas.tseries.offsets import CustomBusinessDay df = pd.read_csv('appl_no_dates.csv') print(df) rng = pd.date_range(start='7/1/2017', end='7/21/2017', freq='B') print(rng) us_cal = CustomBusinessDay(calendar=USFederalHolidayCalendar()) rng = pd.date_range(start='7/1/2017', end='7/21/2017', freq=us_cal) print(rng) df.set_index(rng, inplace=True) print(df) class myCalendar(AbstractHolidayCalendar): rules = [ Holiday('My Birth Day', month=4, day=12) #, observance=nearest_workday), ] my_bday = CustomBusinessDay(calendar=myCalendar()) print(pd.date_range('4/1/2017','4/30/2017',freq=my_bday)) print(pd.date_range(start='4/1/2017', end='4/30/2017',freq=my_bday))
def get_holidays(dt_start, dt_end): br_holidays = CustomBusinessDay(calendar=BrazilHolidays()) inst = BrazilHolidays() return inst.holidays(dt_start, dt_end)
import MySQLdb as mariadb from pymongo import MongoClient from sqlalchemy import create_engine, Table, Column, MetaData from sqlalchemy_utils import database_exists, create_database from contextlib import contextmanager from data import * # Duplicated from utils from pandas.tseries.holiday import USFederalHolidayCalendar from pandas.tseries.offsets import CustomBusinessDay from pandas.tseries.offsets import CustomBusinessMonthBegin US_Federal_Calendar = USFederalHolidayCalendar() bmth_us = CustomBusinessMonthBegin(calendar=US_Federal_Calendar) bday_us = CustomBusinessDay(calendar=US_Federal_Calendar) logging.basicConfig() logging.getLogger().setLevel(logging.WARN) Dtype_Mapping = { 'object': 'TEXT', 'int64': 'INT', 'float64': 'FLOAT', 'datetime64': 'DATETIME', 'bool': 'TINYINT', 'category': 'TEXT', 'timedelta[ns]': 'TEXT' }
def day(self): return CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat Sun')
def create_data(): """ create the pickle data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], )) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({ "A": series["float"], "B": series["float"] + 1 }), int=DataFrame({ "A": series["int"], "B": series["int"] + 1 }), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )
def trade_date_offset(self): """ 交易日offset :return: """ return CustomBusinessDay(holidays=self.holiday_calendar.tolist())
def create_data(): """create the pickle data""" data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = { "timestamp": Timestamp("20130101"), "period": Period("2012", "M") } index = { "int": Index(np.arange(10)), "date": date_range("20130101", periods=10), "period": period_range("2013-01-01", freq="M", periods=10), "float": Index(np.arange(10, dtype=np.float64)), "uint": Index(np.arange(10, dtype=np.uint64)), "timedelta": timedelta_range("00:00:00", freq="30T", periods=10), } index["range"] = RangeIndex(10) index["interval"] = interval_range(0, periods=10) mi = { "reg2": MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], ) } series = { "float": Series(data["A"]), "int": Series(data["B"]), "mixed": Series(data["E"]), "ts": Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), "mi": Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), "dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), "cat": Series(Categorical(["foo", "bar", "baz"])), "dt": Series(date_range("20130101", periods=5)), "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")), "period": Series([Period("2000Q1")] * 5), } mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = { "float": DataFrame({ "A": series["float"], "B": series["float"] + 1 }), "int": DataFrame({ "A": series["int"], "B": series["int"] + 1 }), "mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), "mi": DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), "dup": DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), "cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}), "cat_and_float": DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), "mixed_dup": mixed_dup_df, "dt_mixed_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), "dt_mixed2_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), } cat = { "int8": Categorical(list("abcdefg")), "int16": Categorical(np.arange(1000)), "int32": Categorical(np.arange(10000)), } timestamp = { "normal": Timestamp("2011-01-01"), "nat": NaT, "tz": Timestamp("2011-01-01", tz="US/Eastern"), } timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return { "series": series, "frame": frame, "index": index, "scalars": scalars, "mi": mi, "sp_series": { "float": _create_sp_series(), "ts": _create_sp_tsseries() }, "sp_frame": { "float": _create_sp_frame() }, "cat": cat, "timestamp": timestamp, "offsets": off, }
#with size 10 & 75 df10=df2[0:10] df75=df2[0:75] # #from pandas.tseries.holiday import USFederalHolidayCalendar # #indias celender is not in pandas we can create holidays of it # #In india sunday is holiday so i use mask sunday(we can do better) # #let say 2 july also an holiday(assume) # # so all sundays and selected holidays we can handle in this way # In[311]: from pandas.tseries.offsets import CustomBusinessDay hh=CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat',holidays=['2017-07-02']) # In[312]: r1=pd.date_range(start="6/1/2015",end="6/11/2015",freq=hh) df10.set_index(r1,inplace=True) df10.head() # In[313]: df10.plot()
def get_dates(start_date, num_days): us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar()) dates = pd.date_range(start_date, periods=num_days, freq=us_bd) return np.array(dates.to_pydatetime(), dtype=np.datetime64)
# New Years have difference observance method because the preceeding # Friday is year end and exchange is open Holiday('New Years Day', month=1, day=1, observance=sunday_to_monday), USMartinLutherKingJr, USPresidentsDay, USMemorialDay, Holiday('July 4th', month=7, day=4, observance=nearest_workday), USLaborDay, GoodFriday, USThanksgivingDay, Holiday('Christmas', month=12, day=25, observance=nearest_workday) ] # US business day us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar()) # US financial business day fi_bd = CustomBusinessDay(calendar=USFinancialHolidayCalendar()) fi_holidays = USFinancialHolidayCalendar().holidays _ = fi_holidays('19500101', '20500101') # Activate the holiday for better efficiency # US Exchange bussiness hour fi_bh = CustomBusinessHour(calendar=USFederalHolidayCalendar(), start='9:30', end='16:30') def weekday_distance(t1, t2, convention='forward'): """ Number of weekdays between t1 and t2: t2 - t1 Rolling forward convention: from 0 am+ to 0 am+
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two'] ])), names=[u'first', u'second'])) series = dict( float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict( float=DataFrame({ u'A': series[u'float'], u'B': series[u'float'] + 1 }), int=DataFrame({ u'A': series[u'int'], u'B': series[u'int'] + 1 }), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame( { u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC') }, index=range(5))) with catch_warnings(record=True): mixed_dup_panel = Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'int'] }) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1 }), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def generate_and_update_futures_data_file_4tickerhead(**kwargs): ticker_head = kwargs['ticker_head'] con = msu.get_my_sql_connection(**kwargs) if os.path.isfile(presaved_futures_data_folder + '/' + ticker_head + '.pkl'): old_data = pd.read_pickle(presaved_futures_data_folder + '/' + ticker_head + '.pkl') last_available_date = int( old_data['settle_date'].max().to_pydatetime().strftime('%Y%m%d')) date_from = cu.doubledate_shift(last_available_date, 60) data4_tickerhead = gfp.get_futures_price_4ticker( ticker_head=ticker_head, date_from=date_from, con=con) else: data4_tickerhead = gfp.get_futures_price_4ticker( ticker_head=ticker_head, con=con) data4_tickerhead = pd.merge(data4_tickerhead, dirty_data_points, on=['settle_date', 'ticker'], how='left') data4_tickerhead = data4_tickerhead[data4_tickerhead['discard'] != True] data4_tickerhead = data4_tickerhead.drop('discard', 1) data4_tickerhead['close_price'] = [ float(x) if x is not None else float('NaN') for x in data4_tickerhead['close_price'].values ] data4_tickerhead['open_price'] = [ float(x) if x is not None else float('NaN') for x in data4_tickerhead['open_price'].values ] data4_tickerhead['high_price'] = [ float(x) if x is not None else float('NaN') for x in data4_tickerhead['high_price'].values ] data4_tickerhead['low_price'] = [ float(x) if x is not None else float('NaN') for x in data4_tickerhead['low_price'].values ] data4_tickerhead['cont_indx'] = 100 * data4_tickerhead[ 'ticker_year'] + data4_tickerhead['ticker_month'] unique_cont_indx_list = data4_tickerhead['cont_indx'].unique() num_contracts = len(unique_cont_indx_list) unique_cont_indx_list = np.sort(unique_cont_indx_list) merged_dataframe_list = [None] * num_contracts bday_us = CustomBusinessDay(calendar=exp.get_calendar_4ticker_head('CL')) full_dates = pd.date_range(start=data4_tickerhead['settle_date'].min(), end=data4_tickerhead['settle_date'].max(), freq=bday_us) for i in range(num_contracts): contract_data = data4_tickerhead[data4_tickerhead['cont_indx'] == unique_cont_indx_list[i]] contract_full_dates = full_dates[ (full_dates >= contract_data['settle_date'].min()) & (full_dates <= contract_data['settle_date'].max())] full_date_frame = pd.DataFrame(contract_full_dates, columns=['settle_date']) merged_dataframe_list[i] = pd.merge(full_date_frame, contract_data, on='settle_date', how='left') merged_dataframe_list[i]['ticker'] = contract_data['ticker'][ contract_data.index[0]] merged_dataframe_list[i]['ticker_head'] = contract_data['ticker_head'][ contract_data.index[0]] merged_dataframe_list[i]['ticker_month'] = contract_data[ 'ticker_month'][contract_data.index[0]] merged_dataframe_list[i]['ticker_year'] = contract_data['ticker_year'][ contract_data.index[0]] merged_dataframe_list[i]['cont_indx'] = contract_data['cont_indx'][ contract_data.index[0]] merged_dataframe_list[i][ 'change1'] = merged_dataframe_list[i]['close_price'].shift( -2) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i]['change1_instant'] = merged_dataframe_list[i][ 'close_price'].shift(-1) - merged_dataframe_list[i]['close_price'] merged_dataframe_list[i]['high1_instant'] = merged_dataframe_list[i][ 'high_price'].shift(-1) merged_dataframe_list[i]['low1_instant'] = merged_dataframe_list[i][ 'low_price'].shift(-1) merged_dataframe_list[i][ 'change2'] = merged_dataframe_list[i]['close_price'].shift( -3) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i][ 'change5'] = merged_dataframe_list[i]['close_price'].shift( -6) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i][ 'change10'] = merged_dataframe_list[i]['close_price'].shift( -11) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i][ 'change20'] = merged_dataframe_list[i]['close_price'].shift( -21) - merged_dataframe_list[i]['close_price'].shift(-1) merged_dataframe_list[i]['change_5'] = merged_dataframe_list[i][ 'close_price'] - merged_dataframe_list[i]['close_price'].shift(5) merged_dataframe_list[i]['change_1'] = merged_dataframe_list[i][ 'close_price'] - merged_dataframe_list[i]['close_price'].shift(1) data4_tickerhead = pd.concat(merged_dataframe_list) if os.path.isfile(presaved_futures_data_folder + '/' + ticker_head + '.pkl'): data4_tickerhead['past_indx'] = [ 1 if np.isfinite(x) else 0 for x in data4_tickerhead['change_5'].values ] clean_data = data4_tickerhead clean_data['frame_indx'] = 1 data_columns = old_data.columns old_data['frame_indx'] = 0 old_data['past_indx'] = [ 1 if np.isfinite(x) else 0 for x in old_data['change_5'].values ] merged_data = pd.concat([old_data, clean_data], ignore_index=True, sort=True) merged_data.sort_values( ['cont_indx', 'settle_date', 'past_indx', 'frame_indx'], ascending=[True, True, False, False], inplace=True) merged_data.drop_duplicates(subset=['settle_date', 'cont_indx'], keep='first', inplace=True) data4_tickerhead = merged_data.drop(['frame_indx', 'past_indx'], 1, inplace=False) data4_tickerhead = data4_tickerhead[data_columns] data4_tickerhead.to_pickle(presaved_futures_data_folder + '/' + ticker_head + '.pkl') if 'con' not in kwargs.keys(): con.close()
def update_options_price_database_from_cme_files_4ticker(**kwargs): ticker = kwargs['ticker'] print(ticker) contract_specs_output = cmi.get_contract_specs(ticker) ticker_head = contract_specs_output['ticker_head'] ticker_month_num = contract_specs_output['ticker_month_num'] ticker_year = contract_specs_output['ticker_year'] if 'settle_date' in kwargs.keys(): settle_date = kwargs['settle_date'] kwargs['report_date'] = settle_date else: settle_date = int(time.strftime('%Y%m%d')) kwargs['settle_date'] = settle_date kwargs['report_date'] = settle_date if not exp.is_business_day(double_date=settle_date, reference_tickerhead=ticker_head): return if 'expiration_date' in kwargs.keys(): expiration_date = kwargs['expiration_date'] else: expiration_date = exp.get_options_expiration(ticker) expiration_date = expiration_date.date() settle_datetime = cu.convert_doubledate_2datetime(settle_date) if 'cal_dte' in kwargs.keys(): cal_dte = kwargs['cal_dte'] else: cal_dte = (expiration_date - settle_datetime.date()).days if 'tr_dte' in kwargs.keys(): tr_dte = kwargs['tr_dte'] else: bday_us = CustomBusinessDay( calendar=exp.get_calendar_4ticker_head(ticker_head)) dts = pd.date_range(start=settle_datetime, end=expiration_date, freq=bday_us) tr_dte = len( [x for x in dts if x.to_pydatetime().date() < expiration_date]) data_vendor_id = 2 now = dt.datetime.now() con = msu.get_my_sql_connection(**kwargs) process_output = pco.process_cme_options_4ticker(**kwargs) if process_output['success']: settle_frame = process_output['settle_frame'] else: if 'con' not in kwargs.keys(): con.close() return column_names = settle_frame.columns.tolist() option_type_indx = column_names.index('option_type') strike_indx = column_names.index('strike') settle_indx = column_names.index('settle') volume_indx = column_names.index('volume') interest_indx = column_names.index('interest') tuples = [ tuple([ data_vendor_id, ticker_head, ticker_month_num, ticker_year, ticker, x[option_type_indx], x[strike_indx], settle_datetime.date(), cal_dte, tr_dte, now, now, None if np.isnan(x[settle_indx]) else x[settle_indx], None if np.isnan(x[volume_indx]) else x[volume_indx], None if np.isnan(x[interest_indx]) else x[interest_indx] ]) for x in settle_frame.values ] column_str = "data_vendor_id, ticker_head, ticker_month, ticker_year, ticker, " \ " option_type, strike, price_date, cal_dte, tr_dte, " \ " created_date,last_updated_date, close_price, volume, open_interest" insert_str = ("%s, " * len(column_str.split(',')))[:-2] final_str = "REPLACE INTO daily_option_price (%s) VALUES (%s)" % ( column_str, insert_str) msu.sql_execute_many_wrapper(final_str=final_str, tuples=tuples, con=con) if 'con' not in kwargs.keys(): con.close()
Holiday("Independence Day", month=7, day=15, observance=nearest_workday), Holiday("Veterans Day", month=8, day=14, observance=nearest_workday), Holiday("New Years Day", month=8, day=15, observance=nearest_workday), Holiday("New Years Day", month=8, day=16, observance=nearest_workday), Holiday("'体育の日", month=10, day=14, observance=nearest_workday), Holiday("New Years Day", month=12, day=30, observance=nearest_workday), Holiday("Black Friday", month=12, day=31, observance=nearest_workday), ] # umc-workingday # #using CustomBusinessDay cal = CustomBusinessDay(calendar=myCalendar()) #start to end frequency with calling custombusinessday s = pd.date_range('2019-01-01', '2019-12-31', freq=cal) #date format sumc = s.strftime('%Y-%m-%d') #extra date (saturday and sunday ) s1 = ['2019-12-28'] #Calling DataFrame constructor on s df = pd.DataFrame(sumc, columns=['umc']) #Calling DataFrame constructor on s1 df1 = pd.DataFrame(s1, columns=['umc']) #concatenating df and df1 on Dataframe data = pd.concat([df, df1], ignore_index=True) # converting to list umcdata = {'umc-workingday': data["umc"].tolist()}
'Mayor William J. Gaynor Funeral 12pm late open Sept 22, 1913', month=9, day=22, start_date=Timestamp('1913-09-22'), end_date=Timestamp('1913-09-22'), ) # 1914 # Reopened for trading bonds (with restrictions) Nov 27, 1914 # Reopened for trading stocks (with restrictions) Dec 12, 1914 # Restrictions remained in place until April 1, 1915 OnsetOfWWI1914 = date_range( '1914-07-31', '1914-12-11', freq=CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat'), tz='UTC') # 1917 DraftRegistrationDay1917 = [Timestamp('1917-06-05', tz='UTC')] WeatherHeatClosing1917 = [Timestamp('1917-08-04', tz='UTC')] ParadeOfNationalGuardEarlyClose1917 = Holiday( 'Parade of National Guard 12pm Early Close Aug 29, 1917', month=8, day=29, start_date=Timestamp('1917-08-29'), end_date=Timestamp('1917-08-29'), )
def day(self): """ The days on which our exchange will be open. """ weekmask = "Mon Tue Wed Thu Fri Sat Sun" return CustomBusinessDay(weekmask=weekmask)
# Set the title # ax1.set_title("AAPL Adjusted opening price") # Other customizations # ax1.set_ylim(0, 200) # ax1.set_yticks([0, 50, 100, 150, 200]) # Other plots # ax2.plot(aapl[["Adj. Low", "Adj. High"]]) # ax3.plot(aapl[["Low", "High"]]) # Data Cleaning bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar()) def neigbor_dates(date, nbefore=3, nafter=3): # Make sure the date is a datetime date = pd.to_datetime(date) # Create a list of business days before_and_after = [date + i * bday_us for i in range(-nbefore, nafter + 1)] return before_and_after dates = [] for ann_date in announcement_dates.index:
def __init__(self): self.cbd = CustomBusinessDay(calendar=self.calendar, weekmask=self.weekmask) self.delta = timedelta_between(self.close_time, self.open_time)