Ejemplo n.º 1
0
 def setUp(self):
     self.bday = CustomBusinessDay(calendar=USFederalHolidayCalendar())
Ejemplo n.º 2
0
                day=1,
                offset=[Easter(), Day(-47)]),
        # Sexta feira Santa(GoodFriday)
        GoodFriday,
        Holiday('Tiradentes', month=4, day=21),
        Holiday('Dia do Trabalho', month=5, day=1),
        # Corpus Christi (60 dias após a Páscoa, ou 62 dias após a Sexta Feira Santa)
        Holiday('Corpus Christi', month=1, day=1, offset=[Easter(),
                                                          Day(60)]),
        Holiday('Independência do Brasil', month=9, day=7),
        Holiday('Nossa Senhora Aparecida - Padroeira do Brasil',
                month=10,
                day=12),
        Holiday('Finados', month=11, day=2),
        Holiday('Proclamação da República', month=11, day=15),
        Holiday('Natal', month=12, day=25)
    ]


################# Exemplos #########################

br_feriados = CustomBusinessDay(calendar=Feriados_Brasil())
inst = Feriados_Brasil()

# Gerar lista de feriados de 2001 até final de 2079 (mesmo período disponibilizado pela Anbima)
lista_feriados = inst.holidays(dt.datetime(2000, 12, 31),
                               dt.datetime(2079, 12, 31))

# Se uma data não é dia útil, por exemplo 01/01/2010, determinar o próximo dia útil
pŕoximo_dia_util = date(2010, 1, 1) + br_feriados
Ejemplo n.º 3
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10))

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'],
              [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']
              ])),
                                          names=[u'first', u'second']))

    series = dict(
        float=Series(data[u'A']),
        int=Series(data[u'B']),
        mixed=Series(data[u'E']),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range('20130101', periods=10)),
        mi=Series(np.arange(5).astype(np.float64),
                  index=MultiIndex.from_tuples(tuple(
                      zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                               names=[u'one', u'two'])),
        dup=Series(np.arange(5).astype(np.float64),
                   index=[u'A', u'B', u'C', u'D', u'A']),
        cat=Series(Categorical([u'foo', u'bar', u'baz'])),
        dt=Series(date_range('20130101', periods=5)),
        dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')),
        period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(
        float=DataFrame({
            u'A': series[u'float'],
            u'B': series[u'float'] + 1
        }),
        int=DataFrame({
            u'A': series[u'int'],
            u'B': series[u'int'] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in [u'A', u'B', u'C', u'D']}),
        mi=DataFrame(
            {
                u'A': np.arange(5).astype(np.float64),
                u'B': np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(tuple(
                zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'],
                      [u'one', u'two', u'one', u'two', u'three']])),
                                         names=[u'first', u'second'])),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=[u'A', u'B', u'A']),
        cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
        cat_and_float=DataFrame({
            u'A': Categorical([u'foo', u'bar', u'baz']),
            u'B': np.arange(3).astype(np.int64)
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET')
            },
            index=range(5)),
        dt_mixed2_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET'),
                u'C': Timestamp('20130603', tz='UTC')
            },
            index=range(5)))

    with catch_warnings(record=True):
        mixed_dup_panel = Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'int']
        })
        mixed_dup_panel.items = [u'ItemA', u'ItemA']
        panel = dict(float=Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'float'] + 1
        }),
                     dup=Panel(np.arange(30).reshape(3, 5,
                                                     2).astype(np.float64),
                               items=[u'A', u'B', u'A']),
                     mixed_dup=mixed_dup_panel)

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
Ejemplo n.º 4
0
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
import numpy as np
from matplotlib import pyplot as pt

df = pd.read_csv(
    r"C:\Users\Ritish Adhikari\Desktop\CSV File\aapl_no_dates.csv")
rows, columns = df.shape
#print("Printing the original DF Dataframe")
#print(df)
print()
print(
    "------------Putting a date range to the DF Dataframe with US holiday list and printing the dataframe with date time index---------"
)
USB = CustomBusinessDay(calendar=USFederalHolidayCalendar())
dt = pd.date_range(start='2018-07-01', periods=rows, freq=USB)
df.set_index(dt, inplace=True)
print(df)

print()
print(
    "----------------------Printing Indian Calendar Now-------------------------------"
)

from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday


class MyBirthdayCalendar(AbstractHolidayCalendar):

    rules = [
Ejemplo n.º 5
0
from strategy import Strategy
from backtest import Bt
from statistic import indicator, index_accumulate_return

import pandas as pd
from typing import Callable
import matplotlib.pyplot as plt
from pandas.tseries.offsets import BDay
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay

from data import Data

US_BUSINESS_DAY = CustomBusinessDay(calendar=USFederalHolidayCalendar())


class Portfolio:
    def __init__(self, start_date, end_date):
        self.data = Data().data
        self.start_date = start_date
        self.end_date = end_date

    def run(self, hold_days: int, strategy: Callable):

        assert isinstance(hold_days,
                          int), 'the type of hold_dates should be int.'

        signal_dict = self._strategy_ticker_signal(strategy)  # {ticker: }

        ret_output = pd.Series()
        log_df = pd.DataFrame()
Ejemplo n.º 6
0
        Holiday("Independence Day",
                month=7,
                day=15,
                observance=nearest_workday),
        Holiday("Veterans Day", month=8, day=14, observance=nearest_workday),
        Holiday("New Years Day", month=8, day=15, observance=nearest_workday),
        Holiday("New Years Day", month=8, day=16, observance=nearest_workday),
        Holiday("'体育の日", month=10, day=14, observance=nearest_workday),
        Holiday("New Years Day", month=12, day=30, observance=nearest_workday),
        Holiday("Black Friday", month=12, day=31, observance=nearest_workday),
    ]


# umc-workingday #
#using CustomBusinessDay
cal = CustomBusinessDay(calendar=myCalendar())
#start to end frequency with calling custombusinessday
s = pd.date_range('2019-01-01', '2019-12-31', freq=cal)
#date format
sumc = s.strftime('%Y-%m-%d')
#extra date (saturday and sunday )
s1 = ['2019-12-28']
#Calling DataFrame constructor on s
df = pd.DataFrame(sumc, columns=['umc'])
#Calling DataFrame constructor on s1
df1 = pd.DataFrame(s1, columns=['umc'])
#concatenating df and df1 on  Dataframe
data = pd.concat([df, df1], ignore_index=True)
# converting to list
umcdata = {'umc-workingday': data["umc"].tolist()}
Ejemplo n.º 7
0
import MySQLdb as mariadb
from pymongo import MongoClient
from sqlalchemy import create_engine, Table, Column, MetaData
from sqlalchemy_utils import database_exists, create_database
from contextlib import contextmanager

from data import *

# Duplicated from utils
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.offsets import CustomBusinessMonthBegin

US_Federal_Calendar = USFederalHolidayCalendar()
bmth_us = CustomBusinessMonthBegin(calendar=US_Federal_Calendar)
bday_us = CustomBusinessDay(calendar=US_Federal_Calendar)

logging.basicConfig()
logging.getLogger().setLevel(logging.WARN)

Dtype_Mapping = {
    'object': 'TEXT',
    'int64': 'INT',
    'float64': 'FLOAT',
    'datetime64': 'DATETIME',
    'bool': 'TINYINT',
    'category': 'TEXT',
    'timedelta[ns]': 'TEXT'
}

Ejemplo n.º 8
0
#HOLIDAYS
import pandas as pd 
import numpy as np
from datetime import datetime
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday
from pandas.tseries.offsets import CustomBusinessDay

df = pd.read_csv('appl_no_dates.csv')
print(df)

rng = pd.date_range(start='7/1/2017', end='7/21/2017', freq='B')
print(rng)

us_cal = CustomBusinessDay(calendar=USFederalHolidayCalendar())

rng = pd.date_range(start='7/1/2017', end='7/21/2017', freq=us_cal)
print(rng)

df.set_index(rng, inplace=True)
print(df)

class myCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday('My Birth Day', month=4, day=12)    #, observance=nearest_workday),
    ]
    
my_bday = CustomBusinessDay(calendar=myCalendar())
print(pd.date_range('4/1/2017','4/30/2017',freq=my_bday))
print(pd.date_range(start='4/1/2017', end='4/30/2017',freq=my_bday))
plt.legend()
plt.show()
'''
# Creating a DataFrame containing the timeseries data along with the pca features
data.iloc[:, 1:3] = X[:, 0:2]
ts = data.iloc[:, 0:3]
ts.columns = ['InvoiceDate', 'PCA1', 'PCA2']
ts.index = ts.InvoiceDate
ts = ts.iloc[:, 1:3]
#ts1 = ts.drop_duplicates()

UBO = ts

from pandas.tseries.offsets import CustomBusinessDay
week_mask = 'Mon'
bo1 = CustomBusinessDay(weekmask=week_mask)
BOM = ts.asfreq(freq=bo1, method='ffill')
# Applying PCA
from sklearn.decomposition import PCA
pca1 = PCA(
    n_components=1
)  # first run with the n_components=None , then after seeing the explained variance, choose the number
BOM = pca1.fit_transform(BOM)
explained_variance = pca1.explained_variance_ratio_
plt.plot(BOM, label='Monday')
plt.title('Biased Observer - Monday 2')
plt.legend('M')
plt.figure('1')

from pandas.tseries.offsets import CustomBusinessDay
week_mask = 'Tue'
def get_holidays(dt_start, dt_end):
    br_holidays = CustomBusinessDay(calendar=BrazilHolidays())
    inst = BrazilHolidays()
    return inst.holidays(dt_start, dt_end)
Ejemplo n.º 11
0
 def __setstate__(self, state):
     self.business_days = state.pop("business_days")
     CustomBusinessDay.__setstate__(self, state)
Ejemplo n.º 12
0
 def day(self):
     return CustomBusinessDay(holidays=self.adhoc_holidays,
                              calendar=self.regular_holidays, weekmask="Mon Tue Wed Thu Fri")
Ejemplo n.º 13
0
 def __init__(self):
     self.cbd = CustomBusinessDay(calendar=self.calendar,
                                  weekmask=self.weekmask)
     self.delta = timedelta_between(self.close_time, self.open_time)
Ejemplo n.º 14
0
#%%
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
from datetime import date
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
import pytz
import os 
cwd = os.getcwd()
print(cwd)
path = 'Senior-Thesis/'
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar(),  normalize=True)

def isWorkingDay(x):
    d = us_bd.rollback(date(x.year, x.month, x.day))
    return d.day == x.day and d.month == x.month and d.year == x.year

#%%
with open(path+'data/tusonload.json') as f:
    datafile = json.load(f)
ldf = pd.DataFrame(datafile['series'][0]['data'], columns=['date', 'load'])
ldf.date = pd.to_datetime(ldf.date)


#%%
a = ldf['load'].isnull()
b = a.cumsum()
c = ldf['load'].bfill()
d = c + (b-b.mask(a).bfill().fillna(0).astype(int)).sub(1)
Ejemplo n.º 15
0
        # New Years have difference observance method because the preceeding
        # Friday is year end and exchange is open
        Holiday('New Years Day', month=1, day=1, observance=sunday_to_monday),
        USMartinLutherKingJr,
        USPresidentsDay,
        USMemorialDay,
        Holiday('July 4th', month=7, day=4, observance=nearest_workday),
        USLaborDay,
        GoodFriday,
        USThanksgivingDay,
        Holiday('Christmas', month=12, day=25, observance=nearest_workday)
    ]


# US business day
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
# US financial business day
fi_bd = CustomBusinessDay(calendar=USFinancialHolidayCalendar())
fi_holidays = USFinancialHolidayCalendar().holidays
_ = fi_holidays('19500101',
                '20500101')  # Activate the holiday for better efficiency

# US Exchange bussiness hour
fi_bh = CustomBusinessHour(calendar=USFederalHolidayCalendar(),
                           start='9:30',
                           end='16:30')


def weekday_distance(t1, t2, convention='forward'):
    """ Number of weekdays between t1 and t2: t2 - t1
    Rolling forward convention: from 0 am+ to 0 am+
Ejemplo n.º 16
0
import numpy as np
import pandas as pd
import patsy

from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import scipy
import pytz
import re

utils.setup_logging()
_log = logging.getLogger(__name__)
UTC_TZ = pytz.timezone('UTC')
WORKING_DIR = os.getcwd()
__version__ = 0.1
HOLIDAYS = pd.to_datetime(CustomBusinessDay(calendar=calendar()).holidays)


def is_weekend_holiday(start, end, tz):
    if start.astimezone(tz).date() in HOLIDAYS and \
            end.astimezone(tz).date() in HOLIDAYS:
        return True
    if start.astimezone(tz).weekday() > 4 and \
            end.astimezone(tz).weekday() > 4:
        return True
    return False


def sort_list(lst):
    sorted_list = []
    for item in lst:
Ejemplo n.º 17
0
def generate_and_update_futures_data_file_4tickerhead(**kwargs):

    ticker_head = kwargs['ticker_head']

    con = msu.get_my_sql_connection(**kwargs)

    if os.path.isfile(presaved_futures_data_folder + '/' + ticker_head +
                      '.pkl'):
        old_data = pd.read_pickle(presaved_futures_data_folder + '/' +
                                  ticker_head + '.pkl')
        last_available_date = int(
            old_data['settle_date'].max().to_pydatetime().strftime('%Y%m%d'))
        date_from = cu.doubledate_shift(last_available_date, 60)
        data4_tickerhead = gfp.get_futures_price_4ticker(
            ticker_head=ticker_head, date_from=date_from, con=con)
    else:
        data4_tickerhead = gfp.get_futures_price_4ticker(
            ticker_head=ticker_head, con=con)

    data4_tickerhead = pd.merge(data4_tickerhead,
                                dirty_data_points,
                                on=['settle_date', 'ticker'],
                                how='left')
    data4_tickerhead = data4_tickerhead[data4_tickerhead['discard'] != True]
    data4_tickerhead = data4_tickerhead.drop('discard', 1)

    data4_tickerhead['close_price'] = [
        float(x) if x is not None else float('NaN')
        for x in data4_tickerhead['close_price'].values
    ]
    data4_tickerhead['open_price'] = [
        float(x) if x is not None else float('NaN')
        for x in data4_tickerhead['open_price'].values
    ]
    data4_tickerhead['high_price'] = [
        float(x) if x is not None else float('NaN')
        for x in data4_tickerhead['high_price'].values
    ]
    data4_tickerhead['low_price'] = [
        float(x) if x is not None else float('NaN')
        for x in data4_tickerhead['low_price'].values
    ]

    data4_tickerhead['cont_indx'] = 100 * data4_tickerhead[
        'ticker_year'] + data4_tickerhead['ticker_month']
    unique_cont_indx_list = data4_tickerhead['cont_indx'].unique()
    num_contracts = len(unique_cont_indx_list)
    unique_cont_indx_list = np.sort(unique_cont_indx_list)
    merged_dataframe_list = [None] * num_contracts

    bday_us = CustomBusinessDay(calendar=exp.get_calendar_4ticker_head('CL'))
    full_dates = pd.date_range(start=data4_tickerhead['settle_date'].min(),
                               end=data4_tickerhead['settle_date'].max(),
                               freq=bday_us)

    for i in range(num_contracts):

        contract_data = data4_tickerhead[data4_tickerhead['cont_indx'] ==
                                         unique_cont_indx_list[i]]

        contract_full_dates = full_dates[
            (full_dates >= contract_data['settle_date'].min())
            & (full_dates <= contract_data['settle_date'].max())]
        full_date_frame = pd.DataFrame(contract_full_dates,
                                       columns=['settle_date'])
        merged_dataframe_list[i] = pd.merge(full_date_frame,
                                            contract_data,
                                            on='settle_date',
                                            how='left')

        merged_dataframe_list[i]['ticker'] = contract_data['ticker'][
            contract_data.index[0]]
        merged_dataframe_list[i]['ticker_head'] = contract_data['ticker_head'][
            contract_data.index[0]]
        merged_dataframe_list[i]['ticker_month'] = contract_data[
            'ticker_month'][contract_data.index[0]]
        merged_dataframe_list[i]['ticker_year'] = contract_data['ticker_year'][
            contract_data.index[0]]
        merged_dataframe_list[i]['cont_indx'] = contract_data['cont_indx'][
            contract_data.index[0]]

        merged_dataframe_list[i][
            'change1'] = merged_dataframe_list[i]['close_price'].shift(
                -2) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i]['change1_instant'] = merged_dataframe_list[i][
            'close_price'].shift(-1) - merged_dataframe_list[i]['close_price']
        merged_dataframe_list[i]['high1_instant'] = merged_dataframe_list[i][
            'high_price'].shift(-1)
        merged_dataframe_list[i]['low1_instant'] = merged_dataframe_list[i][
            'low_price'].shift(-1)
        merged_dataframe_list[i][
            'change2'] = merged_dataframe_list[i]['close_price'].shift(
                -3) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i][
            'change5'] = merged_dataframe_list[i]['close_price'].shift(
                -6) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i][
            'change10'] = merged_dataframe_list[i]['close_price'].shift(
                -11) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i][
            'change20'] = merged_dataframe_list[i]['close_price'].shift(
                -21) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i]['change_5'] = merged_dataframe_list[i][
            'close_price'] - merged_dataframe_list[i]['close_price'].shift(5)
        merged_dataframe_list[i]['change_1'] = merged_dataframe_list[i][
            'close_price'] - merged_dataframe_list[i]['close_price'].shift(1)

    data4_tickerhead = pd.concat(merged_dataframe_list)

    if os.path.isfile(presaved_futures_data_folder + '/' + ticker_head +
                      '.pkl'):
        data4_tickerhead['past_indx'] = [
            1 if np.isfinite(x) else 0
            for x in data4_tickerhead['change_5'].values
        ]
        clean_data = data4_tickerhead
        clean_data['frame_indx'] = 1

        data_columns = old_data.columns
        old_data['frame_indx'] = 0
        old_data['past_indx'] = [
            1 if np.isfinite(x) else 0 for x in old_data['change_5'].values
        ]
        merged_data = pd.concat([old_data, clean_data],
                                ignore_index=True,
                                sort=True)
        merged_data.sort_values(
            ['cont_indx', 'settle_date', 'past_indx', 'frame_indx'],
            ascending=[True, True, False, False],
            inplace=True)
        merged_data.drop_duplicates(subset=['settle_date', 'cont_indx'],
                                    keep='first',
                                    inplace=True)
        data4_tickerhead = merged_data.drop(['frame_indx', 'past_indx'],
                                            1,
                                            inplace=False)
        data4_tickerhead = data4_tickerhead[data_columns]

    data4_tickerhead.to_pickle(presaved_futures_data_folder + '/' +
                               ticker_head + '.pkl')

    if 'con' not in kwargs.keys():
        con.close()
Ejemplo n.º 18
0
    def _processOption1Input(self):
        '''
        Validates input for option 1.
        '''
        #print("_procOp1") # debug
        self._symbols = self._symbolInput1.get().upper()
        self._startDate = self._startDateEntry.get()
        self._endDate = self._endDateEntry.get()

        endDateCheck = tuple(self._endDate.split('-'))
        startDateCheck = tuple(self._startDate.split('-'))

        if (self._symbols and self._startDate and self._endDate):
            # Convert symbols to tuple
            self._symbolList = [
                symbol.strip() for symbol in self._symbols.split(',')
            ]
            if "" in self._symbolList: self._symbolList.remove("")
            #print(self._symbolList)
            # No more than 4 symbols.
            limit = 4
            if len(self._symbolList) <= limit:
                for symbol in self._symbolList:
                    if not symbol.isalpha():
                        raise ValueError(
                            "Symbols must be alphabetical characters.")
            else:
                raise ValueError("There may be no more than " + str(limit) +
                                 " symbols.")
                # Must all be alphabetical chars.

            # date checks
            # 1. Correct format
            # 2. An date in the calendar
            # 3. The date ends before the start date
            if len(endDateCheck) is 3 and len(
                    startDateCheck) is 3:  # Correct format?
                # Are they all numbers? Check by converting to int
                #print(endDateCheck, startDateCheck)
                endDateList = [int(num) for num in endDateCheck]
                startDateList = [int(num) for num in startDateCheck]
                present = datetime.datetime.now()
                # Now check to see that the end date is later than the start date
                if not (datetime.datetime(*endDateList) <=
                        datetime.datetime.now()):
                    raise ValueError(
                        "The end date needs to either occur today or before today."
                    )
                # and the endDate is no further than the current day.
                elif not (datetime.datetime(*endDateList) >=
                          datetime.datetime(*startDateList)):
                    raise ValueError(
                        "The end date needs to occur after the start date.")
                else:
                    # So if those two errors aren't raised, modify the start date and end date
                    # to ensure that both are business days and not weekends nor holidays.
                    # If they are, then roll them to the nearest business day.
                    self._startDate = str(
                        np.busday_offset(self._startDate, 0, roll='forward'))
                    self._endDate = str(
                        np.busday_offset(self._endDate, 0, roll='backward'))
                    us_bd = CustomBusinessDay(
                        calendar=USFederalHolidayCalendar())
                    # also create a list of the dates in between the start and the end date to use in DataRetriever.
                    self._wantedDates = list(
                        pd.DatetimeIndex(start=self._startDate,
                                         end=self._endDate,
                                         freq=us_bd).format())
                    self._startDate = self._wantedDates[0]
                    self._endDate = self._wantedDates[-1]

            else:
                raise ValueError('Invalid date format!')
        else:
            #pass
            raise ValueError(
                "All three fields need to be filled before a graph " +
                "can be created.")
Ejemplo n.º 19
0
 def day(self):
     """
     The days on which our exchange will be open.
     """
     weekmask = "Mon Tue Wed Thu Fri Sat Sun"
     return CustomBusinessDay(weekmask=weekmask)
Ejemplo n.º 20
0

pd.date_range(start="7/1/2017", end="7/21/2017", freq='B')


# In[4]:


from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay


# In[5]:


usb = CustomBusinessDay(calendar=USFederalHolidayCalendar())


# In[6]:


rng = pd.date_range(start="7/1/2017", end="7/21/2017", freq=usb)


# In[7]:


df.set_index(rng, inplace=True)
df

def create_data():
    """ create the pickle data """

    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(
            zip(*[
                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                ["one", "two", "one", "two", "one", "two", "one", "two"],
            ])),
        names=["first", "second"],
    ))

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range("20130101", periods=10)),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        dup=Series(np.arange(5).astype(np.float64),
                   index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        int=DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=["A", "B", "A"]),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )
Ejemplo n.º 22
0
class FrenchBusinessCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday('New Years Day', month=1, day=1), EasterMonday,
        Holiday('Labour Day', month=5, day=1),
        Holiday('Victory in Europe Day', month=5, day=8),
        Holiday('Ascension Day', month=1, day=1, offset=[Easter(),
                                                         Day(39)]),
        Holiday('Bastille Day', month=7, day=14),
        Holiday('Assumption of Mary to Heaven', month=8, day=15),
        Holiday('All Saints Day', month=11, day=1),
        Holiday('Armistice Day', month=11, day=11),
        Holiday('Christmas Day', month=12, day=25)
    ]


French_BD = CustomBusinessDay(calendar=FrenchBusinessCalendar())
s = pd.date_range('2016-12-29', end='2021-01-03', freq=French_BD)
df = pd.DataFrame(s, columns=['Date'])

# Define fares depending on day time
normal_dict = {
    'day_first_hour_fare': '40',
    'night_first_hour_fare': '49.50',
    'day_subsequent_hour_fare': '32',
    'night_subsequent_hour_fare': '37.50'
}

holiday_dict = {
    'day_first_hour_fare': '49.50',
    'night_first_hour_fare': '57',
    'day_subsequent_hour_fare': '37.50',
Ejemplo n.º 23
0
def create_data():
    """create the pickle data"""
    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = {
        "timestamp": Timestamp("20130101"),
        "period": Period("2012", "M")
    }

    index = {
        "int": Index(np.arange(10)),
        "date": date_range("20130101", periods=10),
        "period": period_range("2013-01-01", freq="M", periods=10),
        "float": Index(np.arange(10, dtype=np.float64)),
        "uint": Index(np.arange(10, dtype=np.uint64)),
        "timedelta": timedelta_range("00:00:00", freq="30T", periods=10),
    }

    index["range"] = RangeIndex(10)

    index["interval"] = interval_range(0, periods=10)

    mi = {
        "reg2":
        MultiIndex.from_tuples(
            tuple(
                zip(*[
                    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                    ["one", "two", "one", "two", "one", "two", "one", "two"],
                ])),
            names=["first", "second"],
        )
    }

    series = {
        "float":
        Series(data["A"]),
        "int":
        Series(data["B"]),
        "mixed":
        Series(data["E"]),
        "ts":
        Series(np.arange(10).astype(np.int64),
               index=date_range("20130101", periods=10)),
        "mi":
        Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        "dup":
        Series(np.arange(5).astype(np.float64),
               index=["A", "B", "C", "D", "A"]),
        "cat":
        Series(Categorical(["foo", "bar", "baz"])),
        "dt":
        Series(date_range("20130101", periods=5)),
        "dt_tz":
        Series(date_range("20130101", periods=5, tz="US/Eastern")),
        "period":
        Series([Period("2000Q1")] * 5),
    }

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = {
        "float":
        DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        "int":
        DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        "mixed":
        DataFrame({k: data[k]
                   for k in ["A", "B", "C", "D"]}),
        "mi":
        DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        "dup":
        DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                  columns=["A", "B", "A"]),
        "cat_onecol":
        DataFrame({"A": Categorical(["foo", "bar"])}),
        "cat_and_float":
        DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        "mixed_dup":
        mixed_dup_df,
        "dt_mixed_tzs":
        DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        "dt_mixed2_tzs":
        DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    }

    cat = {
        "int8": Categorical(list("abcdefg")),
        "int16": Categorical(np.arange(1000)),
        "int32": Categorical(np.arange(10000)),
    }

    timestamp = {
        "normal": Timestamp("2011-01-01"),
        "nat": NaT,
        "tz": Timestamp("2011-01-01", tz="US/Eastern"),
    }

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return {
        "series": series,
        "frame": frame,
        "index": index,
        "scalars": scalars,
        "mi": mi,
        "sp_series": {
            "float": _create_sp_series(),
            "ts": _create_sp_tsseries()
        },
        "sp_frame": {
            "float": _create_sp_frame()
        },
        "cat": cat,
        "timestamp": timestamp,
        "offsets": off,
    }
Ejemplo n.º 24
0
 def day(self):
     return CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat Sun')
Ejemplo n.º 25
0
def get_dates(start_date, num_days):
    us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
    dates = pd.date_range(start_date, periods=num_days, freq=us_bd)
    return np.array(dates.to_pydatetime(), dtype=np.datetime64)
Ejemplo n.º 26
0
 def trade_date_offset(self):
     """
     交易日offset
     :return:
     """
     return CustomBusinessDay(holidays=self.holiday_calendar.tolist())
Ejemplo n.º 27
0
 def day(self):
     return CustomBusinessDay(
         holidays=self.adhoc_holidays,
         calendar=self.regular_holidays,
         weekmask=self.weekmask,
     )
Ejemplo n.º 28
0
#with size 10 & 75
df10=df2[0:10]
df75=df2[0:75]


# #from pandas.tseries.holiday import USFederalHolidayCalendar
# #indias celender is not in pandas we can create holidays of it
# #In india sunday is holiday so i use mask sunday(we can do better)
# #let say 2 july also an holiday(assume)
# # so all sundays and selected holidays we can handle in this way

# In[311]:


from pandas.tseries.offsets import CustomBusinessDay
hh=CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat',holidays=['2017-07-02'])


# In[312]:


r1=pd.date_range(start="6/1/2015",end="6/11/2015",freq=hh)
df10.set_index(r1,inplace=True)
df10.head()


# In[313]:


df10.plot()
Ejemplo n.º 29
0
accuracy_svr_lin = clf_svr_lin.score(X_svr_lin_test, y_svr_lin_test)
accuracy_svr_rbf = clf_svr_rbf.score(X_svr_rbf_test, y_svr_rbf_test)
accuracy_lin = clf_lin.score(X_lin_test, y_lin_test)

# Predicting the data
forecast_predicted_svr_lin = clf_svr_lin.predict(X_lately_svr_lin)
forecast_predicted_svr_rbf = clf_svr_rbf.predict(X_lately_svr_rbf)
forecast_predicted_lin = clf_lin.predict(X_lately_lin)

# Creating the column with nan
df_lin['Forecast_lin'] = np.nan
df_svr_lin['Forecast_svr_lin'] = np.nan
df_svr_rbf['Forecast_svr_rbf'] = np.nan

# Getting the business and holiday days
bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())

#Last date on dataset in accordingly with forecasted days inserted
last_date_lin = df_lin.iloc[-1].name
last_date_svr_lin = df_svr_lin.iloc[-1].name
last_date_svr_rbf = df_svr_rbf.iloc[-1].name
one_day = "1"
next_bday_lin = last_date_lin + '1'
next_bday_svr_lin = last_date_svr_lin + "1"
next_bday_svr_rbf = last_date_svr_rbf + "1"

# Looping to adding every predicted price in the right date
for i in forecast_predicted_svr_lin:
    next_date_svr_lin = next_bday_svr_lin
    next_bday_svr_lin += one_day
    df_svr_lin.loc[next_date_svr_lin] = [np.nan for _ in range(len(df_svr_lin.columns)-1)] + [i]
Ejemplo n.º 30
0
    def test_get_clean_factor_and_forward_returns_6(self):
        """
        Test get_clean_factor_and_forward_returns with a daily factor
        on a business day calendar and holidays
        """
        tickers = ['A', 'B', 'C', 'D', 'E', 'F']

        factor_groups = {'A': 1, 'B': 2, 'C': 1, 'D': 2, 'E': 1, 'F': 2}

        price_data = [[1.10**i, 0.50**i, 3.00**i, 0.90**i, 0.50**i, 1.00**i]
                      for i in range(1, 22)]  # 21 days = 18 + 3 fwd returns

        factor_data = [[3, 4, 2, 1, nan, nan], [3, nan, nan, 1, 4, 2],
                       [3, 4, 2, 1, nan, nan]] * 6  # 18 days

        start = '2017-1-12'
        factor_end = '2017-2-10'
        price_end = '2017-2-15'  # 3D (business day) fwd returns
        holidays = ['2017-1-13', '2017-1-18', '2017-1-30', '2017-2-7']
        holidays = [Timestamp(d) for d in holidays]
        # 首先定义 freq 对象
        freq = CustomBusinessDay(normalize=True, holidays=holidays)

        price_index = date_range(start=start, end=price_end, freq=freq)
        price_index.name = 'date'
        # 使用`drop`方法 丢失 freq
        # price_index = price_index.drop(holidays)

        prices = DataFrame(index=price_index, columns=tickers, data=price_data)

        factor_index = date_range(start=start, end=factor_end, freq=freq)
        factor_index.name = 'date'
        # factor_index = factor_index.drop(holidays)
        factor = DataFrame(index=factor_index,
                           columns=tickers,
                           data=factor_data).stack()

        factor_data = get_clean_factor_and_forward_returns(
            factor,
            prices,
            groupby=factor_groups,
            quantiles=4,
            periods=(1, 2, 3))

        expected_idx = factor.index.rename(['date', 'asset'])
        expected_cols = [
            '1D', '2D', '3D', 'factor', 'group', 'factor_quantile'
        ]
        expected_data = [
            [0.1, 0.21, 0.331, 3.0, 1, 3], [-0.5, -0.75, -0.875, 4.0, 2, 4],
            [2.0, 8.00, 26.000, 2.0, 1, 2], [-0.1, -0.19, -0.271, 1.0, 2, 1],
            [0.1, 0.21, 0.331, 3.0, 1, 3], [-0.1, -0.19, -0.271, 1.0, 2, 1],
            [-0.5, -0.75, -0.875, 4.0, 1, 4], [0.0, 0.00, 0.000, 2.0, 2, 2],
            [0.1, 0.21, 0.331, 3.0, 1, 3], [-0.5, -0.75, -0.875, 4.0, 2, 4],
            [2.0, 8.00, 26.000, 2.0, 1, 2], [-0.1, -0.19, -0.271, 1.0, 2, 1]
        ] * 6  # 18  days
        expected = DataFrame(index=expected_idx,
                             columns=expected_cols,
                             data=expected_data)
        expected['group'] = expected['group'].astype('category')

        assert_frame_equal(factor_data, expected)

        # inferred_holidays = factor_data.index.levels[0].freq.holidays
        # assert sorted(holidays) == sorted(inferred_holidays)
        assert not any(factor_data.index.levels[0].isin(holidays))