Example #1
0
 def day(self):
     return CustomBusinessDay(holidays=self.adhoc_holidays,
                              calendar=self.regular_holidays, weekmask="Mon Tue Wed Thu Fri")
Example #2
0
def _get_custom_bd(exchange):
    from pandas.tseries.offsets import CustomBusinessDay
    calendar = GsCalendar.get(exchange).business_day_calendar()
    return CustomBusinessDay(calendar=calendar)
 def day(self):
     return CustomBusinessDay(
         holidays=self.adhoc_holidays,
         calendar=self.regular_holidays,
         weekmask=self.weekmask,
     )
Example #4
0
                day=1,
                offset=[Easter(), Day(-47)]),
        # Sexta feira Santa(GoodFriday)
        GoodFriday,
        Holiday('Tiradentes', month=4, day=21),
        Holiday('Dia do Trabalho', month=5, day=1),
        # Corpus Christi (60 dias após a Páscoa, ou 62 dias após a Sexta Feira Santa)
        Holiday('Corpus Christi', month=1, day=1, offset=[Easter(),
                                                          Day(60)]),
        Holiday('Independência do Brasil', month=9, day=7),
        Holiday('Nossa Senhora Aparecida - Padroeira do Brasil',
                month=10,
                day=12),
        Holiday('Finados', month=11, day=2),
        Holiday('Proclamação da República', month=11, day=15),
        Holiday('Natal', month=12, day=25)
    ]


################# Exemplos #########################

br_feriados = CustomBusinessDay(calendar=Feriados_Brasil())
inst = Feriados_Brasil()

# Gerar lista de feriados de 2001 até final de 2079 (mesmo período disponibilizado pela Anbima)
lista_feriados = inst.holidays(dt.datetime(2000, 12, 31),
                               dt.datetime(2079, 12, 31))

# Se uma data não é dia útil, por exemplo 01/01/2010, determinar o próximo dia útil
pŕoximo_dia_util = date(2010, 1, 1) + br_feriados
Example #5
0
 def bday(self, d):
     return d * CustomBusinessDay(calendar=self)
Example #6
0
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
import numpy as np
from matplotlib import pyplot as pt

df = pd.read_csv(
    r"C:\Users\Ritish Adhikari\Desktop\CSV File\aapl_no_dates.csv")
rows, columns = df.shape
#print("Printing the original DF Dataframe")
#print(df)
print()
print(
    "------------Putting a date range to the DF Dataframe with US holiday list and printing the dataframe with date time index---------"
)
USB = CustomBusinessDay(calendar=USFederalHolidayCalendar())
dt = pd.date_range(start='2018-07-01', periods=rows, freq=USB)
df.set_index(dt, inplace=True)
print(df)

print()
print(
    "----------------------Printing Indian Calendar Now-------------------------------"
)

from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday


class MyBirthdayCalendar(AbstractHolidayCalendar):

    rules = [
Example #7
0
from strategy import Strategy
from backtest import Bt
from statistic import indicator, index_accumulate_return

import pandas as pd
from typing import Callable
import matplotlib.pyplot as plt
from pandas.tseries.offsets import BDay
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay

from data import Data

US_BUSINESS_DAY = CustomBusinessDay(calendar=USFederalHolidayCalendar())


class Portfolio:
    def __init__(self, start_date, end_date):
        self.data = Data().data
        self.start_date = start_date
        self.end_date = end_date

    def run(self, hold_days: int, strategy: Callable):

        assert isinstance(hold_days,
                          int), 'the type of hold_dates should be int.'

        signal_dict = self._strategy_ticker_signal(strategy)  # {ticker: }

        ret_output = pd.Series()
        log_df = pd.DataFrame()
Example #8
0

pd.date_range(start="7/1/2017", end="7/21/2017", freq='B')


# In[4]:


from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay


# In[5]:


usb = CustomBusinessDay(calendar=USFederalHolidayCalendar())


# In[6]:


rng = pd.date_range(start="7/1/2017", end="7/21/2017", freq=usb)


# In[7]:


df.set_index(rng, inplace=True)
df

Example #9
0
class FrenchBusinessCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday('New Years Day', month=1, day=1), EasterMonday,
        Holiday('Labour Day', month=5, day=1),
        Holiday('Victory in Europe Day', month=5, day=8),
        Holiday('Ascension Day', month=1, day=1, offset=[Easter(),
                                                         Day(39)]),
        Holiday('Bastille Day', month=7, day=14),
        Holiday('Assumption of Mary to Heaven', month=8, day=15),
        Holiday('All Saints Day', month=11, day=1),
        Holiday('Armistice Day', month=11, day=11),
        Holiday('Christmas Day', month=12, day=25)
    ]


French_BD = CustomBusinessDay(calendar=FrenchBusinessCalendar())
s = pd.date_range('2016-12-29', end='2021-01-03', freq=French_BD)
df = pd.DataFrame(s, columns=['Date'])

# Define fares depending on day time
normal_dict = {
    'day_first_hour_fare': '40',
    'night_first_hour_fare': '49.50',
    'day_subsequent_hour_fare': '32',
    'night_subsequent_hour_fare': '37.50'
}

holiday_dict = {
    'day_first_hour_fare': '49.50',
    'night_first_hour_fare': '57',
    'day_subsequent_hour_fare': '37.50',
Example #10
0
import numpy as np
import pandas as pd
import patsy

from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import scipy
import pytz
import re

utils.setup_logging()
_log = logging.getLogger(__name__)
UTC_TZ = pytz.timezone('UTC')
WORKING_DIR = os.getcwd()
__version__ = 0.1
HOLIDAYS = pd.to_datetime(CustomBusinessDay(calendar=calendar()).holidays)


def is_weekend_holiday(start, end, tz):
    if start.astimezone(tz).date() in HOLIDAYS and \
            end.astimezone(tz).date() in HOLIDAYS:
        return True
    if start.astimezone(tz).weekday() > 4 and \
            end.astimezone(tz).weekday() > 4:
        return True
    return False


def sort_list(lst):
    sorted_list = []
    for item in lst:
    def _processOption1Input(self):
        '''
        Validates input for option 1.
        '''
        #print("_procOp1") # debug
        self._symbols = self._symbolInput1.get().upper()
        self._startDate = self._startDateEntry.get()
        self._endDate = self._endDateEntry.get()

        endDateCheck = tuple(self._endDate.split('-'))
        startDateCheck = tuple(self._startDate.split('-'))

        if (self._symbols and self._startDate and self._endDate):
            # Convert symbols to tuple
            self._symbolList = [
                symbol.strip() for symbol in self._symbols.split(',')
            ]
            if "" in self._symbolList: self._symbolList.remove("")
            #print(self._symbolList)
            # No more than 4 symbols.
            limit = 4
            if len(self._symbolList) <= limit:
                for symbol in self._symbolList:
                    if not symbol.isalpha():
                        raise ValueError(
                            "Symbols must be alphabetical characters.")
            else:
                raise ValueError("There may be no more than " + str(limit) +
                                 " symbols.")
                # Must all be alphabetical chars.

            # date checks
            # 1. Correct format
            # 2. An date in the calendar
            # 3. The date ends before the start date
            if len(endDateCheck) is 3 and len(
                    startDateCheck) is 3:  # Correct format?
                # Are they all numbers? Check by converting to int
                #print(endDateCheck, startDateCheck)
                endDateList = [int(num) for num in endDateCheck]
                startDateList = [int(num) for num in startDateCheck]
                present = datetime.datetime.now()
                # Now check to see that the end date is later than the start date
                if not (datetime.datetime(*endDateList) <=
                        datetime.datetime.now()):
                    raise ValueError(
                        "The end date needs to either occur today or before today."
                    )
                # and the endDate is no further than the current day.
                elif not (datetime.datetime(*endDateList) >=
                          datetime.datetime(*startDateList)):
                    raise ValueError(
                        "The end date needs to occur after the start date.")
                else:
                    # So if those two errors aren't raised, modify the start date and end date
                    # to ensure that both are business days and not weekends nor holidays.
                    # If they are, then roll them to the nearest business day.
                    self._startDate = str(
                        np.busday_offset(self._startDate, 0, roll='forward'))
                    self._endDate = str(
                        np.busday_offset(self._endDate, 0, roll='backward'))
                    us_bd = CustomBusinessDay(
                        calendar=USFederalHolidayCalendar())
                    # also create a list of the dates in between the start and the end date to use in DataRetriever.
                    self._wantedDates = list(
                        pd.DatetimeIndex(start=self._startDate,
                                         end=self._endDate,
                                         freq=us_bd).format())
                    self._startDate = self._wantedDates[0]
                    self._endDate = self._wantedDates[-1]

            else:
                raise ValueError('Invalid date format!')
        else:
            #pass
            raise ValueError(
                "All three fields need to be filled before a graph " +
                "can be created.")
plt.legend()
plt.show()
'''
# Creating a DataFrame containing the timeseries data along with the pca features
data.iloc[:, 1:3] = X[:, 0:2]
ts = data.iloc[:, 0:3]
ts.columns = ['InvoiceDate', 'PCA1', 'PCA2']
ts.index = ts.InvoiceDate
ts = ts.iloc[:, 1:3]
#ts1 = ts.drop_duplicates()

UBO = ts

from pandas.tseries.offsets import CustomBusinessDay
week_mask = 'Mon'
bo1 = CustomBusinessDay(weekmask=week_mask)
BOM = ts.asfreq(freq=bo1, method='ffill')
# Applying PCA
from sklearn.decomposition import PCA
pca1 = PCA(
    n_components=1
)  # first run with the n_components=None , then after seeing the explained variance, choose the number
BOM = pca1.fit_transform(BOM)
explained_variance = pca1.explained_variance_ratio_
plt.plot(BOM, label='Monday')
plt.title('Biased Observer - Monday 2')
plt.legend('M')
plt.figure('1')

from pandas.tseries.offsets import CustomBusinessDay
week_mask = 'Tue'
Example #13
0
#HOLIDAYS
import pandas as pd 
import numpy as np
from datetime import datetime
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday
from pandas.tseries.offsets import CustomBusinessDay

df = pd.read_csv('appl_no_dates.csv')
print(df)

rng = pd.date_range(start='7/1/2017', end='7/21/2017', freq='B')
print(rng)

us_cal = CustomBusinessDay(calendar=USFederalHolidayCalendar())

rng = pd.date_range(start='7/1/2017', end='7/21/2017', freq=us_cal)
print(rng)

df.set_index(rng, inplace=True)
print(df)

class myCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday('My Birth Day', month=4, day=12)    #, observance=nearest_workday),
    ]
    
my_bday = CustomBusinessDay(calendar=myCalendar())
print(pd.date_range('4/1/2017','4/30/2017',freq=my_bday))
print(pd.date_range(start='4/1/2017', end='4/30/2017',freq=my_bday))
def get_holidays(dt_start, dt_end):
    br_holidays = CustomBusinessDay(calendar=BrazilHolidays())
    inst = BrazilHolidays()
    return inst.holidays(dt_start, dt_end)
Example #15
0
import MySQLdb as mariadb
from pymongo import MongoClient
from sqlalchemy import create_engine, Table, Column, MetaData
from sqlalchemy_utils import database_exists, create_database
from contextlib import contextmanager

from data import *

# Duplicated from utils
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.offsets import CustomBusinessMonthBegin

US_Federal_Calendar = USFederalHolidayCalendar()
bmth_us = CustomBusinessMonthBegin(calendar=US_Federal_Calendar)
bday_us = CustomBusinessDay(calendar=US_Federal_Calendar)

logging.basicConfig()
logging.getLogger().setLevel(logging.WARN)

Dtype_Mapping = {
    'object': 'TEXT',
    'int64': 'INT',
    'float64': 'FLOAT',
    'datetime64': 'DATETIME',
    'bool': 'TINYINT',
    'category': 'TEXT',
    'timedelta[ns]': 'TEXT'
}

Example #16
0
 def day(self):
     return CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat Sun')
def create_data():
    """ create the pickle data """

    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(
            zip(*[
                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                ["one", "two", "one", "two", "one", "two", "one", "two"],
            ])),
        names=["first", "second"],
    ))

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range("20130101", periods=10)),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        dup=Series(np.arange(5).astype(np.float64),
                   index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        int=DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=["A", "B", "A"]),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )
Example #18
0
 def trade_date_offset(self):
     """
     交易日offset
     :return:
     """
     return CustomBusinessDay(holidays=self.holiday_calendar.tolist())
Example #19
0
def create_data():
    """create the pickle data"""
    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = {
        "timestamp": Timestamp("20130101"),
        "period": Period("2012", "M")
    }

    index = {
        "int": Index(np.arange(10)),
        "date": date_range("20130101", periods=10),
        "period": period_range("2013-01-01", freq="M", periods=10),
        "float": Index(np.arange(10, dtype=np.float64)),
        "uint": Index(np.arange(10, dtype=np.uint64)),
        "timedelta": timedelta_range("00:00:00", freq="30T", periods=10),
    }

    index["range"] = RangeIndex(10)

    index["interval"] = interval_range(0, periods=10)

    mi = {
        "reg2":
        MultiIndex.from_tuples(
            tuple(
                zip(*[
                    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                    ["one", "two", "one", "two", "one", "two", "one", "two"],
                ])),
            names=["first", "second"],
        )
    }

    series = {
        "float":
        Series(data["A"]),
        "int":
        Series(data["B"]),
        "mixed":
        Series(data["E"]),
        "ts":
        Series(np.arange(10).astype(np.int64),
               index=date_range("20130101", periods=10)),
        "mi":
        Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        "dup":
        Series(np.arange(5).astype(np.float64),
               index=["A", "B", "C", "D", "A"]),
        "cat":
        Series(Categorical(["foo", "bar", "baz"])),
        "dt":
        Series(date_range("20130101", periods=5)),
        "dt_tz":
        Series(date_range("20130101", periods=5, tz="US/Eastern")),
        "period":
        Series([Period("2000Q1")] * 5),
    }

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = {
        "float":
        DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        "int":
        DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        "mixed":
        DataFrame({k: data[k]
                   for k in ["A", "B", "C", "D"]}),
        "mi":
        DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        "dup":
        DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                  columns=["A", "B", "A"]),
        "cat_onecol":
        DataFrame({"A": Categorical(["foo", "bar"])}),
        "cat_and_float":
        DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        "mixed_dup":
        mixed_dup_df,
        "dt_mixed_tzs":
        DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        "dt_mixed2_tzs":
        DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    }

    cat = {
        "int8": Categorical(list("abcdefg")),
        "int16": Categorical(np.arange(1000)),
        "int32": Categorical(np.arange(10000)),
    }

    timestamp = {
        "normal": Timestamp("2011-01-01"),
        "nat": NaT,
        "tz": Timestamp("2011-01-01", tz="US/Eastern"),
    }

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return {
        "series": series,
        "frame": frame,
        "index": index,
        "scalars": scalars,
        "mi": mi,
        "sp_series": {
            "float": _create_sp_series(),
            "ts": _create_sp_tsseries()
        },
        "sp_frame": {
            "float": _create_sp_frame()
        },
        "cat": cat,
        "timestamp": timestamp,
        "offsets": off,
    }
Example #20
0
#with size 10 & 75
df10=df2[0:10]
df75=df2[0:75]


# #from pandas.tseries.holiday import USFederalHolidayCalendar
# #indias celender is not in pandas we can create holidays of it
# #In india sunday is holiday so i use mask sunday(we can do better)
# #let say 2 july also an holiday(assume)
# # so all sundays and selected holidays we can handle in this way

# In[311]:


from pandas.tseries.offsets import CustomBusinessDay
hh=CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat',holidays=['2017-07-02'])


# In[312]:


r1=pd.date_range(start="6/1/2015",end="6/11/2015",freq=hh)
df10.set_index(r1,inplace=True)
df10.head()


# In[313]:


df10.plot()
Example #21
0
def get_dates(start_date, num_days):
    us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
    dates = pd.date_range(start_date, periods=num_days, freq=us_bd)
    return np.array(dates.to_pydatetime(), dtype=np.datetime64)
Example #22
0
        # New Years have difference observance method because the preceeding
        # Friday is year end and exchange is open
        Holiday('New Years Day', month=1, day=1, observance=sunday_to_monday),
        USMartinLutherKingJr,
        USPresidentsDay,
        USMemorialDay,
        Holiday('July 4th', month=7, day=4, observance=nearest_workday),
        USLaborDay,
        GoodFriday,
        USThanksgivingDay,
        Holiday('Christmas', month=12, day=25, observance=nearest_workday)
    ]


# US business day
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
# US financial business day
fi_bd = CustomBusinessDay(calendar=USFinancialHolidayCalendar())
fi_holidays = USFinancialHolidayCalendar().holidays
_ = fi_holidays('19500101',
                '20500101')  # Activate the holiday for better efficiency

# US Exchange bussiness hour
fi_bh = CustomBusinessHour(calendar=USFederalHolidayCalendar(),
                           start='9:30',
                           end='16:30')


def weekday_distance(t1, t2, convention='forward'):
    """ Number of weekdays between t1 and t2: t2 - t1
    Rolling forward convention: from 0 am+ to 0 am+
Example #23
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10))

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'],
              [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']
              ])),
                                          names=[u'first', u'second']))

    series = dict(
        float=Series(data[u'A']),
        int=Series(data[u'B']),
        mixed=Series(data[u'E']),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range('20130101', periods=10)),
        mi=Series(np.arange(5).astype(np.float64),
                  index=MultiIndex.from_tuples(tuple(
                      zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                               names=[u'one', u'two'])),
        dup=Series(np.arange(5).astype(np.float64),
                   index=[u'A', u'B', u'C', u'D', u'A']),
        cat=Series(Categorical([u'foo', u'bar', u'baz'])),
        dt=Series(date_range('20130101', periods=5)),
        dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')),
        period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(
        float=DataFrame({
            u'A': series[u'float'],
            u'B': series[u'float'] + 1
        }),
        int=DataFrame({
            u'A': series[u'int'],
            u'B': series[u'int'] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in [u'A', u'B', u'C', u'D']}),
        mi=DataFrame(
            {
                u'A': np.arange(5).astype(np.float64),
                u'B': np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(tuple(
                zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'],
                      [u'one', u'two', u'one', u'two', u'three']])),
                                         names=[u'first', u'second'])),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=[u'A', u'B', u'A']),
        cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
        cat_and_float=DataFrame({
            u'A': Categorical([u'foo', u'bar', u'baz']),
            u'B': np.arange(3).astype(np.int64)
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET')
            },
            index=range(5)),
        dt_mixed2_tzs=DataFrame(
            {
                u'A': Timestamp('20130102', tz='US/Eastern'),
                u'B': Timestamp('20130603', tz='CET'),
                u'C': Timestamp('20130603', tz='UTC')
            },
            index=range(5)))

    with catch_warnings(record=True):
        mixed_dup_panel = Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'int']
        })
        mixed_dup_panel.items = [u'ItemA', u'ItemA']
        panel = dict(float=Panel({
            u'ItemA': frame[u'float'],
            u'ItemB': frame[u'float'] + 1
        }),
                     dup=Panel(np.arange(30).reshape(3, 5,
                                                     2).astype(np.float64),
                               items=[u'A', u'B', u'A']),
                     mixed_dup=mixed_dup_panel)

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
Example #24
0
def generate_and_update_futures_data_file_4tickerhead(**kwargs):

    ticker_head = kwargs['ticker_head']

    con = msu.get_my_sql_connection(**kwargs)

    if os.path.isfile(presaved_futures_data_folder + '/' + ticker_head +
                      '.pkl'):
        old_data = pd.read_pickle(presaved_futures_data_folder + '/' +
                                  ticker_head + '.pkl')
        last_available_date = int(
            old_data['settle_date'].max().to_pydatetime().strftime('%Y%m%d'))
        date_from = cu.doubledate_shift(last_available_date, 60)
        data4_tickerhead = gfp.get_futures_price_4ticker(
            ticker_head=ticker_head, date_from=date_from, con=con)
    else:
        data4_tickerhead = gfp.get_futures_price_4ticker(
            ticker_head=ticker_head, con=con)

    data4_tickerhead = pd.merge(data4_tickerhead,
                                dirty_data_points,
                                on=['settle_date', 'ticker'],
                                how='left')
    data4_tickerhead = data4_tickerhead[data4_tickerhead['discard'] != True]
    data4_tickerhead = data4_tickerhead.drop('discard', 1)

    data4_tickerhead['close_price'] = [
        float(x) if x is not None else float('NaN')
        for x in data4_tickerhead['close_price'].values
    ]
    data4_tickerhead['open_price'] = [
        float(x) if x is not None else float('NaN')
        for x in data4_tickerhead['open_price'].values
    ]
    data4_tickerhead['high_price'] = [
        float(x) if x is not None else float('NaN')
        for x in data4_tickerhead['high_price'].values
    ]
    data4_tickerhead['low_price'] = [
        float(x) if x is not None else float('NaN')
        for x in data4_tickerhead['low_price'].values
    ]

    data4_tickerhead['cont_indx'] = 100 * data4_tickerhead[
        'ticker_year'] + data4_tickerhead['ticker_month']
    unique_cont_indx_list = data4_tickerhead['cont_indx'].unique()
    num_contracts = len(unique_cont_indx_list)
    unique_cont_indx_list = np.sort(unique_cont_indx_list)
    merged_dataframe_list = [None] * num_contracts

    bday_us = CustomBusinessDay(calendar=exp.get_calendar_4ticker_head('CL'))
    full_dates = pd.date_range(start=data4_tickerhead['settle_date'].min(),
                               end=data4_tickerhead['settle_date'].max(),
                               freq=bday_us)

    for i in range(num_contracts):

        contract_data = data4_tickerhead[data4_tickerhead['cont_indx'] ==
                                         unique_cont_indx_list[i]]

        contract_full_dates = full_dates[
            (full_dates >= contract_data['settle_date'].min())
            & (full_dates <= contract_data['settle_date'].max())]
        full_date_frame = pd.DataFrame(contract_full_dates,
                                       columns=['settle_date'])
        merged_dataframe_list[i] = pd.merge(full_date_frame,
                                            contract_data,
                                            on='settle_date',
                                            how='left')

        merged_dataframe_list[i]['ticker'] = contract_data['ticker'][
            contract_data.index[0]]
        merged_dataframe_list[i]['ticker_head'] = contract_data['ticker_head'][
            contract_data.index[0]]
        merged_dataframe_list[i]['ticker_month'] = contract_data[
            'ticker_month'][contract_data.index[0]]
        merged_dataframe_list[i]['ticker_year'] = contract_data['ticker_year'][
            contract_data.index[0]]
        merged_dataframe_list[i]['cont_indx'] = contract_data['cont_indx'][
            contract_data.index[0]]

        merged_dataframe_list[i][
            'change1'] = merged_dataframe_list[i]['close_price'].shift(
                -2) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i]['change1_instant'] = merged_dataframe_list[i][
            'close_price'].shift(-1) - merged_dataframe_list[i]['close_price']
        merged_dataframe_list[i]['high1_instant'] = merged_dataframe_list[i][
            'high_price'].shift(-1)
        merged_dataframe_list[i]['low1_instant'] = merged_dataframe_list[i][
            'low_price'].shift(-1)
        merged_dataframe_list[i][
            'change2'] = merged_dataframe_list[i]['close_price'].shift(
                -3) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i][
            'change5'] = merged_dataframe_list[i]['close_price'].shift(
                -6) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i][
            'change10'] = merged_dataframe_list[i]['close_price'].shift(
                -11) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i][
            'change20'] = merged_dataframe_list[i]['close_price'].shift(
                -21) - merged_dataframe_list[i]['close_price'].shift(-1)
        merged_dataframe_list[i]['change_5'] = merged_dataframe_list[i][
            'close_price'] - merged_dataframe_list[i]['close_price'].shift(5)
        merged_dataframe_list[i]['change_1'] = merged_dataframe_list[i][
            'close_price'] - merged_dataframe_list[i]['close_price'].shift(1)

    data4_tickerhead = pd.concat(merged_dataframe_list)

    if os.path.isfile(presaved_futures_data_folder + '/' + ticker_head +
                      '.pkl'):
        data4_tickerhead['past_indx'] = [
            1 if np.isfinite(x) else 0
            for x in data4_tickerhead['change_5'].values
        ]
        clean_data = data4_tickerhead
        clean_data['frame_indx'] = 1

        data_columns = old_data.columns
        old_data['frame_indx'] = 0
        old_data['past_indx'] = [
            1 if np.isfinite(x) else 0 for x in old_data['change_5'].values
        ]
        merged_data = pd.concat([old_data, clean_data],
                                ignore_index=True,
                                sort=True)
        merged_data.sort_values(
            ['cont_indx', 'settle_date', 'past_indx', 'frame_indx'],
            ascending=[True, True, False, False],
            inplace=True)
        merged_data.drop_duplicates(subset=['settle_date', 'cont_indx'],
                                    keep='first',
                                    inplace=True)
        data4_tickerhead = merged_data.drop(['frame_indx', 'past_indx'],
                                            1,
                                            inplace=False)
        data4_tickerhead = data4_tickerhead[data_columns]

    data4_tickerhead.to_pickle(presaved_futures_data_folder + '/' +
                               ticker_head + '.pkl')

    if 'con' not in kwargs.keys():
        con.close()
Example #25
0
def update_options_price_database_from_cme_files_4ticker(**kwargs):

    ticker = kwargs['ticker']
    print(ticker)
    contract_specs_output = cmi.get_contract_specs(ticker)
    ticker_head = contract_specs_output['ticker_head']
    ticker_month_num = contract_specs_output['ticker_month_num']
    ticker_year = contract_specs_output['ticker_year']

    if 'settle_date' in kwargs.keys():
        settle_date = kwargs['settle_date']
        kwargs['report_date'] = settle_date
    else:
        settle_date = int(time.strftime('%Y%m%d'))
        kwargs['settle_date'] = settle_date
        kwargs['report_date'] = settle_date

    if not exp.is_business_day(double_date=settle_date,
                               reference_tickerhead=ticker_head):
        return

    if 'expiration_date' in kwargs.keys():
        expiration_date = kwargs['expiration_date']
    else:
        expiration_date = exp.get_options_expiration(ticker)
        expiration_date = expiration_date.date()

    settle_datetime = cu.convert_doubledate_2datetime(settle_date)

    if 'cal_dte' in kwargs.keys():
        cal_dte = kwargs['cal_dte']
    else:
        cal_dte = (expiration_date - settle_datetime.date()).days

    if 'tr_dte' in kwargs.keys():
        tr_dte = kwargs['tr_dte']
    else:
        bday_us = CustomBusinessDay(
            calendar=exp.get_calendar_4ticker_head(ticker_head))
        dts = pd.date_range(start=settle_datetime,
                            end=expiration_date,
                            freq=bday_us)
        tr_dte = len(
            [x for x in dts if x.to_pydatetime().date() < expiration_date])

    data_vendor_id = 2
    now = dt.datetime.now()
    con = msu.get_my_sql_connection(**kwargs)

    process_output = pco.process_cme_options_4ticker(**kwargs)

    if process_output['success']:
        settle_frame = process_output['settle_frame']
    else:
        if 'con' not in kwargs.keys():
            con.close()
        return

    column_names = settle_frame.columns.tolist()

    option_type_indx = column_names.index('option_type')
    strike_indx = column_names.index('strike')
    settle_indx = column_names.index('settle')
    volume_indx = column_names.index('volume')
    interest_indx = column_names.index('interest')

    tuples = [
        tuple([
            data_vendor_id, ticker_head, ticker_month_num, ticker_year, ticker,
            x[option_type_indx], x[strike_indx],
            settle_datetime.date(), cal_dte, tr_dte, now, now,
            None if np.isnan(x[settle_indx]) else x[settle_indx],
            None if np.isnan(x[volume_indx]) else x[volume_indx],
            None if np.isnan(x[interest_indx]) else x[interest_indx]
        ]) for x in settle_frame.values
    ]

    column_str = "data_vendor_id, ticker_head, ticker_month, ticker_year, ticker, " \
                 " option_type, strike, price_date, cal_dte, tr_dte, " \
                 " created_date,last_updated_date, close_price, volume, open_interest"

    insert_str = ("%s, " * len(column_str.split(',')))[:-2]
    final_str = "REPLACE INTO daily_option_price (%s) VALUES (%s)" % (
        column_str, insert_str)
    msu.sql_execute_many_wrapper(final_str=final_str, tuples=tuples, con=con)

    if 'con' not in kwargs.keys():
        con.close()
Example #26
0
        Holiday("Independence Day",
                month=7,
                day=15,
                observance=nearest_workday),
        Holiday("Veterans Day", month=8, day=14, observance=nearest_workday),
        Holiday("New Years Day", month=8, day=15, observance=nearest_workday),
        Holiday("New Years Day", month=8, day=16, observance=nearest_workday),
        Holiday("'体育の日", month=10, day=14, observance=nearest_workday),
        Holiday("New Years Day", month=12, day=30, observance=nearest_workday),
        Holiday("Black Friday", month=12, day=31, observance=nearest_workday),
    ]


# umc-workingday #
#using CustomBusinessDay
cal = CustomBusinessDay(calendar=myCalendar())
#start to end frequency with calling custombusinessday
s = pd.date_range('2019-01-01', '2019-12-31', freq=cal)
#date format
sumc = s.strftime('%Y-%m-%d')
#extra date (saturday and sunday )
s1 = ['2019-12-28']
#Calling DataFrame constructor on s
df = pd.DataFrame(sumc, columns=['umc'])
#Calling DataFrame constructor on s1
df1 = pd.DataFrame(s1, columns=['umc'])
#concatenating df and df1 on  Dataframe
data = pd.concat([df, df1], ignore_index=True)
# converting to list
umcdata = {'umc-workingday': data["umc"].tolist()}
    'Mayor William J. Gaynor Funeral 12pm late open Sept 22, 1913',
    month=9,
    day=22,
    start_date=Timestamp('1913-09-22'),
    end_date=Timestamp('1913-09-22'),
)

# 1914

# Reopened for trading bonds (with restrictions) Nov 27, 1914
# Reopened for trading stocks (with restrictions) Dec 12, 1914
# Restrictions remained in place until April 1, 1915
OnsetOfWWI1914 = date_range(
    '1914-07-31',
    '1914-12-11',
    freq=CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri Sat'),
    tz='UTC')

# 1917
DraftRegistrationDay1917 = [Timestamp('1917-06-05', tz='UTC')]

WeatherHeatClosing1917 = [Timestamp('1917-08-04', tz='UTC')]

ParadeOfNationalGuardEarlyClose1917 = Holiday(
    'Parade of National Guard 12pm Early Close Aug 29, 1917',
    month=8,
    day=29,
    start_date=Timestamp('1917-08-29'),
    end_date=Timestamp('1917-08-29'),
)
Example #28
0
 def day(self):
     """
     The days on which our exchange will be open.
     """
     weekmask = "Mon Tue Wed Thu Fri Sat Sun"
     return CustomBusinessDay(weekmask=weekmask)
Example #29
0
# Set the title
# ax1.set_title("AAPL Adjusted opening price")

# Other customizations
# ax1.set_ylim(0, 200)
# ax1.set_yticks([0, 50, 100, 150, 200])

# Other plots

# ax2.plot(aapl[["Adj. Low", "Adj. High"]])
# ax3.plot(aapl[["Low", "High"]])

# Data Cleaning

bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())


def neigbor_dates(date, nbefore=3, nafter=3):
    # Make sure the date is a datetime
    date = pd.to_datetime(date)

    # Create a list of business days

    before_and_after = [date + i * bday_us for i in range(-nbefore, nafter + 1)]
    return before_and_after


dates = []

for ann_date in announcement_dates.index:
Example #30
0
 def __init__(self):
     self.cbd = CustomBusinessDay(calendar=self.calendar,
                                  weekmask=self.weekmask)
     self.delta = timedelta_between(self.close_time, self.open_time)