Python USFederalHolidayCalendar Exemples, pandas.tseries.holiday.USFederalHolidayCalendar Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : Airline Delays - Data Processing Pipeline.py Projet : stevendleung/Predicting_Airline_Delays

def holiday_column(airline_df, start='2014-01-01', end='2018-12-31'):
    '''Takes airline df and returns df with column indicating if
  date of flight is a US Federal Holiday'''

    #Pull Holiday Dates and convert to Spark DF with timestamp column
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start, end).to_pydatetime()
    holidays_df = pd.DataFrame(pd.DataFrame(holidays)[0].astype('string'))
    schema = StructType([StructField('Holiday_Date', StringType())])
    holidays_sc = spark.createDataFrame(holidays_df, schema)
    holidays_sc = holidays_sc.select(
        to_timestamp(holidays_sc.Holiday_Date,
                     'yyyy-MM-dd').alias('holiday_date'))

    #Join holidays to airlines
    holiday_joined_df = airline_df.join(
        holidays_sc, (airline_df.fl_date == holidays_sc.holiday_date), 'left')

    #Change date column to binary
    holiday_joined_df = holiday_joined_df.withColumn(
        "Holiday", (f.col('holiday_date').isNotNull()).cast("integer"))

    #Drop redundant holiday_date column
    holiday_joined_df = holiday_joined_df.drop(holiday_joined_df.holiday_date)

    return holiday_joined_df

Exemple #2

0

Afficher le fichier

    def __init__(
            self,
            path: str = None,
            data: pd.DataFrame = pd.DataFrame(),
            holdings: pd.DataFrame = pd.DataFrame(),
    ):
        """Initialize the portfolio with holdings and market data.

        Args:
            path: The name of an a directory containing holdings and market data. If this
                is None then the portfolio must be described by the data and holdings arguments.
            data: A DataFrame  containing market close data for a set of
                financial instruments over a period of time.
            holdings:   A DataFrame containing the number of shares held of the set of
                symbols in data over a time period corresponding to that of data.
        """
        if len(data) > 0 and len(holdings) > 0:
            logger.debug("Data and holdings arguments are set.")
            self.data = data
            self.holdings = holdings
        else:
            logger.debug("Data and holdings are not set.")
        if path is None:
            path = Path().home() / ".portfolio" / "data"
        self.path = Path(path)
        if not self.path.is_dir():
            logger.info("%s is not a directory, creating it...", self.path)
            self.path.mkdir()
        self.data_file = self.path / "prices.feather"
        self.holdings_file = self.path / "holdings.feather"
        try:
            # The feather format does not support date indices,
            # so set the Date colemn to be the index.
            self.holdings = pd.read_feather(
                self.holdings_file).set_index("date")
        except FileNotFoundError:
            logger.info("No stored holdings found.")
            self.holdings = holdings
        symbols = self.holdings.columns
        try:
            # The feather format does not support date indices,
            # so set the Date colemn to be the index.
            self.data = pd.read_feather(self.data_file).set_index("date")
        except FileNotFoundError:
            logger.info("Market data is not stored .")
        calendar = USFederalHolidayCalendar()
        today = pd.Timestamp.today().normalize()
        last_business_day = min(today - BDay(1),
                                self.data.index.max() + BDay(1))
        holidays = calendar.holidays(last_business_day, today)
        if (today not in self.data.index
                and last_business_day not in self.data.index
                and holidays.empty):
            self.data = self.get_market_data(
                symbols,
                start=last_business_day,
                end=today,
            )
            self.holdings = self.holdings.reindex(self.data.index,
                                                  method="ffill").dropna()

Exemple #3

0

Afficher le fichier

def pandas_holidays():
    from pandas.tseries.holiday import USFederalHolidayCalendar
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2014-01-01',
                            end='2014-12-31').to_pydatetime()
    if datetime.datetime(2014, 1, 1) in holidays:
        print(True)

Exemple #4

0

Afficher le fichier

Fichier : kernels.py Projet : afcarl/gplib

 def calc_holidays(start_date=datetime.datetime(2012, 10, 1),
                   end_date=datetime.datetime.now()):
     cal = USFederalHolidayCalendar()
     holidays = np.ones(cal.holidays().shape[0])
     holidays = pd.Series(holidays, index=cal.holidays()).resample("D").fillna(0.0)
     h = holidays[start_date:end_date].values
     h = np.where(h)[0] - 1
     return h

Exemple #5

0

Afficher le fichier

Fichier : extract.py Projet : chnhgn/Machine-Learning

 def near_holiday(self):
     calendar = USFederalHolidayCalendar()
     holidays = calendar.holidays()
     df1 = self.df_raw[['id', 'date_account_created']]
     df1.date_account_created = pd.to_datetime(df1.date_account_created)
     df1.date_account_created = pd.to_datetime(df1.date_account_created.dt.date)
     df1['near_holiday'] = (df1.date_account_created.isin(holidays + timedelta(days=1)) | df1.date_account_created.isin(holidays - timedelta(days=1)))
     return df1[['id', 'near_holiday']]

Exemple #6

0

Afficher le fichier

def get_holiday_list(start_yr, start_mon, start_day, end_yr, end_mon, end_day):
    calendar = USFederalHolidayCalendar()
    holiday_list = calendar.holidays(datetime.datetime(start_yr, 11, 1),
                                     datetime.datetime(end_yr, 5, 1))
    holidays = []
    for holiday in holiday_list:
        holidays.append(holiday)
    return holidays

Exemple #7

0

Afficher le fichier

Fichier : getFeatures.py Projet : w33ab00/IRDM2016

def isHoliday(d):
    cal = USFederalHolidayCalendar()
    y_str = d.strftime("%Y")
    holidays = cal.holidays(start=y_str+'-01-01', end=y_str+'-12-31').to_pydatetime()
    if d in holidays:
        return 1
    else:
        return 0

Exemple #8

0

Afficher le fichier

def holiday_lists():
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2021-01-01',
                            end='2021-12-31').to_pydatetime()
    hs = []
    for h in holidays:
        hs.append(h.strftime("%Y/%m/%d"))
    return hs

Exemple #9

0

Afficher le fichier

def next_business_day(date):
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays()
    date = date + BDay(1)
    date = date.to_pydatetime()
    while date in holidays:
        date = date + BDay(1)
        date = date.to_pydatetime()
    return date

Exemple #10

0

Afficher le fichier

Fichier : mianPD.py Projet : ycc1107/pandasStock

 def __init__(self,epsPath,pricePath,daysNeed = 3):
     self.raw_eps = pd.read_csv(epsPath)
     self.price = pd.read_csv(pricePath)
     self.daysNeed = daysNeed
     self.__getEps()
     minDate = str(min(self.epsBefore['DATE'],self.epsAfter['DATE']))
     maxDate = str(max(self.epsBefore['DATE'],self.epsAfter['DATE']))
     cal = USFederalHolidayCalendar()
     self.holidays = cal.holidays(start=minDate, end=maxDate).to_pydatetime()

Exemple #11

0

Afficher le fichier

Fichier : Fetch_Data_Stock_US_Weekly.py Projet : zhangyunGit/StockRecommendSystem

def judgeOpenDaysInRange(from_date, to_date):
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(from_date, to_date)
    duedays = pd.bdate_range(from_date, to_date)
    df = pd.DataFrame()
    df['date'] = duedays
    df['holiday'] = duedays.isin(holidays)
    opendays = df[df['holiday'] == False]
    return opendays

Exemple #12

0

Afficher le fichier

def create_ts_calendar(df, ts_settings, additional_events=None):
    """
    df: pandas df
    ts_settings: dict
        Parameters for time series project
    additional_events: pandas df(optional)
        df of additional events to add to calendar

    Returns:
    --------
    Calendar of events

    """
    date_col = ts_settings['date_col']

    cal = USFederalHolidayCalendar(AbstractHolidayCalendar)

    black_friday = Holiday(
        "Black Friday",
        month=11,
        day=1,
        offset=[pd.DateOffset(weekday=TH(4)),
                pd.DateOffset(1)])
    cyber_monday = Holiday(
        "Cyber Monday",
        month=11,
        day=1,
        offset=[pd.DateOffset(weekday=TH(4)),
                pd.DateOffset(4)])
    cal.rules.append(black_friday)
    cal.rules.append(cyber_monday)

    cal.rules[9] = Holiday('Christmas', month=12, day=25)
    cal.rules[4] = Holiday('July 4th', month=7, day=4)

    events = pd.DataFrame(
        cal.holidays(
            start=pd.to_datetime(df[date_col].min()),
            end=pd.to_datetime(df[date_col].max()) + dt.timedelta(days=365),
            return_name=True,
        ))
    events = events.reset_index()
    events.columns = ['Date', 'Event']

    if additional_events is not None:
        assert additional_events.shape[
            1] == 2, 'additional events must be a df with 2 columns'
        additional_events.columns = ['Date', 'Event']
        additional_events['Date'] = pd.to_datetime(additional_events['Date'])
        events = events.append(additional_events)

    events['Date'] = [
        dt.datetime.strftime(pd.to_datetime(date), '%Y-%m-%d')
        for date in events['Date']
    ]
    return events.drop_duplicates().sort_values(by='Date').reset_index(
        drop=True)

Exemple #13

0

Afficher le fichier

Fichier : stocks.py Projet : pdghawk/systrade

    def get_path(self,
                 initial_values,
                 t_start,
                 t_end,
                 freq,
                 interest_rate,
                 seed=None):
        # check initial value os the right size and dimension
        #print("getting path " , freq.)
        # set up parameters of the pathway model
        covar = stats.corr_to_cov(self.correlation_matrix, self.sigmas)
        covar_param = param.SimpleArrayParam(covar)

        chol = linalg.cholesky(covar, lower=True)
        chol[chol < 1.0e-9] = 0.0
        cholesky_param = param.SimpleArrayParam(chol)

        r_param = param.SimpleParam(interest_rate)

        # create the index of times that are in market hours between
        # requested times
        timeindex = pd.date_range(start=t_start, end=t_end, freq=freq)
        # get the frequency (in seconds) now before removing non-market times
        freq_in_secs = pd.to_timedelta(timeindex.freq,
                                       unit='s').total_seconds()
        # only trading hours
        timeindex = timeindex[timeindex.indexer_between_time('09:30', '16:00')]
        # only weekdays
        timeindex = timeindex[~(timeindex.dayofweek > 4)]
        # remove fed holidays
        cal = USFederalHolidayCalendar()
        hols = cal.holidays(start=timeindex.min(), end=timeindex.max())
        timeindex = timeindex[~timeindex.isin(hols)]

        # get array of time in yearly units and get stock pathways
        times = np.arange(
            0, len(timeindex)) * freq_in_secs / MARKET_SECONDS_PER_YEAR

        # seed and create the pathway generator object
        np.random.seed(seed=seed)
        path_maker = path.GeometricDiffusionManyAsset(self.generator, r_param,
                                                      covar_param,
                                                      cholesky_param)

        if len(times) > 0:
            s_paths = path_maker.get_single_timed_path(initial_values, times)
        else:
            raise RunTimeError('Trying to generate stocks on empty time list')

        # put all data into a pandas Dataframe
        stocks_df = pd.DataFrame(index=timeindex,
                                 data=s_paths,
                                 columns=self.stock_names)
        print("internal : ",
              stocks_df.groupby(stocks_df.index.dayofweek).sum())
        np.random.seed(seed=None)
        return stocks_df

Exemple #14

0

Afficher le fichier

Fichier : mianPD.py Projet : ycc1107/pandasStock

 def __init__(self, epsPath, pricePath, daysNeed=3):
     self.raw_eps = pd.read_csv(epsPath)
     self.price = pd.read_csv(pricePath)
     self.daysNeed = daysNeed
     self.__getEps()
     minDate = str(min(self.epsBefore['DATE'], self.epsAfter['DATE']))
     maxDate = str(max(self.epsBefore['DATE'], self.epsAfter['DATE']))
     cal = USFederalHolidayCalendar()
     self.holidays = cal.holidays(start=minDate,
                                  end=maxDate).to_pydatetime()

Exemple #15

0

Afficher le fichier

Fichier : TimeSeries.py Projet : HuangYiran/data_mining

def create_date_feature_is_public_holiday(df, date, start, end, country = 'US'):
    """
    create date feature: is holiday
    only support USFederal Holiday
    """
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=start, end=end).to_pydatetime()
    foo = lambda x: 1 if x in holidays else 0
    df['is_holiday'] = df[date].map(foo)
    return df

Exemple #16

0

Afficher le fichier

Fichier : date_utils.py Projet : theJollySin/ESTA

def find_holidays(base_year):
    ''' Using Pandas calendar, find all 10 US Federal Holidays,
        plus California's Cesar Chavez Day (March 31).
    '''
    yr = str(base_year)
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=yr + '-01-01',
                            end=yr + '-12-31').to_pydatetime()

    return [d.strftime('%m-%d') for d in holidays] + ['03-31']

Exemple #17

0

Afficher le fichier

Fichier : generateCsvFiltered.py Projet : jeanlks/ARP

def getHolidays(dates):
    holidaysVectorReturn  = []
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2015-01-01', end='2017-12-31').to_pydatetime()
    for idx, date in enumerate(dates):
         if date in holidays:
             holidaysVectorReturn.append(1)
         else:
             holidaysVectorReturn.append(0)
    return  holidaysVectorReturn

Exemple #18

0

Afficher le fichier

    def __init__(self, config_name):
        self.today = datetime.datetime.now().date()
        self._holidays = USFederalHolidayCalendar()
        self.config_name = config_name

        self._set_config()
        self.data = self._pull_data()
        self._clean_data()
        if len(self.data) != 0:
            self._amend_data()
        self._store_data()

Exemple #19

0

Afficher le fichier

Fichier : test_holiday.py Projet : israelzuniga/pandas

    def test_calendar(self):

        calendar = USFederalHolidayCalendar()
        holidays = calendar.holidays(self.start_date, self.end_date)

        holidays_1 = calendar.holidays(self.start_date.strftime("%Y-%m-%d"), self.end_date.strftime("%Y-%m-%d"))
        holidays_2 = calendar.holidays(Timestamp(self.start_date), Timestamp(self.end_date))

        self.assertEqual(list(holidays.to_pydatetime()), self.holiday_list)
        self.assertEqual(list(holidays_1.to_pydatetime()), self.holiday_list)
        self.assertEqual(list(holidays_2.to_pydatetime()), self.holiday_list)

Exemple #20

0

Afficher le fichier

Fichier : stack.py Projet : happyphonon/Airbnb

def build_features(data, features):
    #Timestamp First Active
    data['timestamp_first_active'] = data['timestamp_first_active'].astype(str)
    data['timestamp_first_active_date'] = data['timestamp_first_active'].str[:8]
    data['timestamp_first_active_date'] = pd.to_datetime(data['timestamp_first_active_date'], format='%Y%m%d')
    data['tfa_month'] = data['timestamp_first_active_date'].map(lambda x : x.month)
    data['tfa_year'] = data['timestamp_first_active_date'].map(lambda x : x.year)
    data['tfa_day'] = data['timestamp_first_active_date'].map(lambda x : x.day)
    data['tfa_dayofyear'] = data.timestamp_first_active_date.dt.dayofyear
    data['tfa_dayofweek'] = data.timestamp_first_active_date.dt.dayofweek
    data['tfa_week'] = data.timestamp_first_active_date.dt.week
    data['tfa_quarter'] = data.timestamp_first_active_date.dt.quarter
    features.extend(['tfa_day','tfa_month','tfa_year','tfa_dayofyear','tfa_dayofweek','tfa_week','tfa_quarter'])
    #TFA Holidays
    #calendar = USFederalHolidayCalendar()
    #tfa_holidays = calendar.holidays(start=data['timestamp_first_active_date'].min(),end=data['timestamp_first_active_date'].max())
    #for i in range(len(tfa_holidays)):
        #data['tfa_holiday_diff_'+str(i)] = data['timestamp_first_active_date'].map(lambda x : (x-tfa_holidays[i]).days)
        #data['tfa_holiday_diff_'+str(i)] = data['tfa_holiday_diff_'+str(i)].map(holiday_transform)
        #data_dummy = pd.get_dummies(data['tfa_holiday_diff_'+str(i)],prefix='tfa_holiday_diff_'+str(i))
        #features.extend(data_dummy.columns.values)
        #data.drop(['tfa_holiday_diff_'+str(i)],axis=1,inplace=True)
        #data = pd.concat((data,data_dummy),axis=1)
        #features.extend('tfa_holiday_diff_'+str(i))
    #Date Account Created
    data['date_account_created'] = pd.to_datetime(data['date_account_created'])
    data['dac_month'] = data['date_account_created'].map(lambda x : x.month)
    data['dac_year'] = data['date_account_created'].map(lambda x : x.year)
    data['dac_day'] = data['date_account_created'].map(lambda x : x.day)
    data['dac_dayofyear'] = data.date_account_created.dt.dayofyear
    data['dac_dayofweek'] = data.date_account_created.dt.dayofweek
    data['dac_week'] = data.date_account_created.dt.week
    data['dac_quarter'] = data.date_account_created.dt.quarter
    features.extend(['dac_year','dac_month','dac_day','dac_dayofyear','dac_dayofweek','dac_week','dac_quarter'])
    #DAC Holidays
    calendar = USFederalHolidayCalendar()
    dac_holidays = calendar.holidays(start=data['date_account_created'].min(),end=data['date_account_created'].max())
    for i in range(len(dac_holidays)):
        data['dac_holiday_diff_'+str(i)] = data['date_account_created'].map(lambda x : (x-dac_holidays[i]).days)
        data['dac_holiday_diff_'+str(i)] = data['dac_holiday_diff_'+str(i)].map(holiday_transform)
        features.extend(['dac_holiday_diff_'+str(i)])
    #Days Difference Between TFA and DAC
    data['days_diff'] = (data['date_account_created'] - data['timestamp_first_active_date']).dt.days
    #data['days_diff'] = data['days_diff'].map(holiday_transform)
    features.extend(['days_diff'])
    data.drop(['date_account_created','timestamp_first_active','timestamp_first_active_date'],axis=1,inplace=True)
    other_features = ['gender', 'signup_method','signup_flow','language','affiliate_channel', 'affiliate_provider', 'first_affiliate_tracked', 'signup_app', 'first_device_type', 'first_browser']
    for f in other_features:
        data_dummy = pd.get_dummies(data[f],prefix=f)
        features.extend(data_dummy.columns.values)
        data.drop([f],axis=1,inplace=True)
        data = pd.concat((data,data_dummy),axis=1)
    return data

Exemple #21

0

Afficher le fichier

    def __init__(self, data_df, date):
        # df = pd.read_csv(path, date_parser='Date')
        self.date = date
        df = data_df.copy()
        # df = pd.read_csv(path, date_parser='Date')
        # df = data_df.copy()
        test = df[[
            'Date', 'Hour', 'Weekday', 'Month', 'Load', 'Mean_Temp',
            'Mean_Humi', 'RIV_Temp', 'RIV_Humi', 'LAX_Temp', 'LAX_Humi',
            'USC_Temp', 'USC_Humi', 'WJF_Temp', 'WJF_Humi', 'TRM_Temp',
            'TRM_Humi'
        ]]

        test.loc[:, 'RIV_Temp_Log'] = np.log(df['RIV_Temp'])

        test.loc[:, 'Load_Log'] = np.log(df['Load'])
        test['Load_Lag_48'] = test['Load_Log'].shift(48, axis=0)
        # test['Temp_Lag_48'] = test['Mean_Temp'].shift(48, axis=0)
        test['Humi_Lag_48'] = test['Mean_Humi'].shift(48, axis=0)
        test['RIV_Temp_Log_Lag_48'] = test['RIV_Temp_Log'].shift(48, axis=0)

        # test['RIV_Temp_Lag_48']= test['RIV_Temp'].shift(48, axis=0)

        cal = USFederalHolidayCalendar()

        holidays = cal.holidays(start='2014-01-01',
                                end=str(datetime.datetime.now()),
                                return_name=True)

        holidays = pd.DataFrame(holidays)

        holidays = holidays.reset_index()
        holidays = holidays.rename(columns={'index': "Date", 0: 'Holiday'})
        holidays['Date'] = pd.to_datetime(holidays['Date'])

        test['Date'] = pd.to_datetime(test['Date'])
        lm_data = test.loc[49:len(test), ].merge(holidays,
                                                 how='left',
                                                 on='Date')
        lm_data['Holiday'] = lm_data['Holiday'].fillna("Not Holiday")

        lm_data[["Hour", "Weekday", "Month",
                 "Holiday"]] = lm_data[["Hour", "Weekday", "Month",
                                        "Holiday"]].astype('category')

        DateTime = pd.DataFrame(
            lm_data.apply(lambda line: pd.to_datetime(line['Date']) + datetime.
                          timedelta(hours=line['Hour']),
                          axis=1))
        DateTime.columns = ['DateTime']

        self.lm_data = pd.concat([DateTime, lm_data], axis=1)
        self.lm_data.set_index('DateTime', inplace=True)

Exemple #22

0

Afficher le fichier

Fichier : helper.py Projet : lynkeib/WebProjects

def DR_Temp_data_cleaning(dataframe):
    '''
        inplace change of the dataframe, for the structure purpose, return this dataframe
    '''
    dataframe['Date'] = pd.to_datetime(dataframe['Date'])
    test = dataframe[[
        'Date', 'Hour', 'Weekday', 'Month', 'Load', 'Mean_Temp', 'Mean_Humi',
        'RIV_Temp', 'RIV_Humi', 'LAX_Temp', 'LAX_Humi', 'USC_Temp', 'USC_Humi',
        'WJF_Temp', 'WJF_Humi', 'TRM_Temp', 'TRM_Humi'
    ]]

    test.loc[:, 'RIV_Temp_Log'] = np.log(dataframe['RIV_Temp'])
    test.loc[:, 'LAX_Temp_Log'] = np.log(dataframe['LAX_Temp'])
    test.loc[:, 'USC_Temp_Log'] = np.log(dataframe['USC_Temp'])
    test.loc[:, 'WJF_Temp_Log'] = np.log(dataframe['WJF_Temp'])
    test.loc[:, 'TRM_Temp_Log'] = np.log(dataframe['TRM_Temp'])
    test.loc[:, 'Mean_Temp_Log'] = np.log(dataframe['TRM_Temp'])

    test.loc[:, 'Load_Log'] = np.log(dataframe['Load'])
    test['Load_Lag_48'] = test['Load_Log'].shift(48, axis=0)
    test['Humi_Lag_48'] = test['Mean_Humi'].shift(48, axis=0)

    test['RIV_Temp_Log_Lag_48'] = test['RIV_Temp_Log'].shift(48, axis=0)
    test['LAX_Temp_Log_Lag_48'] = test['LAX_Temp_Log'].shift(48, axis=0)
    test['USC_Temp_Log_Lag_48'] = test['USC_Temp_Log'].shift(48, axis=0)
    test['WJF_Temp_Log_Lag_48'] = test['WJF_Temp_Log'].shift(48, axis=0)
    test['TRM_Temp_Log_Lag_48'] = test['TRM_Temp_Log'].shift(48, axis=0)
    test['Mean_Temp_Log_Lag_48'] = test['Mean_Temp_Log'].shift(48, axis=0)

    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2014-01-01',
                            end=str(datetime.datetime.now()),
                            return_name=True)
    holidays = pd.DataFrame(holidays)
    holidays = holidays.reset_index()
    holidays = holidays.rename(columns={'index': "Date", 0: 'Holiday'})
    holidays['Date'] = pd.to_datetime(holidays['Date'])
    test['Date'] = pd.to_datetime(test['Date'])
    lm_data = test.loc[49:len(test), ].merge(holidays, how='left', on='Date')
    lm_data['Holiday'] = lm_data['Holiday'].fillna("Not Holiday")
    lm_data[["Hour", "Weekday", "Month",
             "Holiday"]] = lm_data[["Hour", "Weekday", "Month",
                                    "Holiday"]].astype('category')
    DateTime = pd.DataFrame(
        lm_data.apply(lambda line: pd.to_datetime(line['Date']) + datetime.
                      timedelta(hours=line['Hour']),
                      axis=1))
    DateTime.columns = ['DateTime']
    lm_data = pd.concat([DateTime, lm_data], axis=1)
    lm_data.set_index('DateTime', inplace=True)

    return lm_data

Exemple #23

0

Afficher le fichier

Fichier : model.py Projet : adelina-ie/MLOPS

def predict(dict):
    """
    This function pickles the
    model and generates the test
    features by passing the dict
    parameter.
    """

    folder_selected = folderSelect()
    filename = os.path.join(folder_selected, "model.joblib")
    if filename is None:
        train_and_persist()
    else:
        with open(filename, "rb") as f:
            model = load(f)
    testDf = pd.DataFrame(dict, index=[0])
    testDf["hr"] = testDf["date"].dt.hour
    testDf["yr"] = testDf["date"].dt.year - testDf["date"].dt.year.min()
    testDf["mnth"] = testDf["date"].dt.month
    testDf["season"] = testDf["date"].map(get_season)
    testDf["weekday"] = testDf["date"].dt.weekday
    testDf["dteday"] = testDf["date"].dt.day
    testDf["dteday"] = pd.to_datetime(testDf["dteday"])
    cal = USFederalHolidayCalendar()
    holidays = pd.to_datetime(cal.holidays(start="2011-01-01", end="2011-06-30"))
    testDf["holiday"] = pd.to_datetime(testDf["date"]).dt.date in holidays
    testDf["workingday"] = pd.to_datetime(testDf["date"]).dt.date not in holidays
    testDf["holiday"] = testDf["holiday"].map(lambda x: 1 if x else 0)
    testDf["workingday"] = testDf["workingday"].map(lambda x: 1 if x else 0)
    t_max , t_min = 50, -8
    # This is for tempretaure normalization
    testDf["temp"] = (testDf["temperature_C"] - t_min)/(t_max-t_min)
    # We divide humidity by 100 to scale it between 0 and 1
    testDf["hum"] = testDf["humidity"]/100
    testDf = testDf.drop(columns=["temperature_C", "humidity"])
    # Convert the data type to eithwe category or to float
    testDf = generating_new_features(testDf)
    testDf = pd.get_dummies(testDf)
    # Finally start with Machine Learning
    test = testDf.drop(columns=["date", "dteday", "feeling_temperature_C"])
    # savedir = Path.home()
    # filename = os.path.join(savedir, "model.joblib")
    # with open(filename, "rb") as f:
    #     model = load(f)
    # print("done!")
    # f.close()
    train_X, train_y, test_X, test_y = prepare_train_data()
    train_features = train_X.columns.values
    test = fill_missing_features(test, train_features)
    pred = model.predict(test)
    pred = pred.astype(int)
    print(" Rounded predictions:\n", pred)

Exemple #24

0

Afficher le fichier

Fichier : trend_finder.py Projet : cduggan1708/stocks

def getStartDate():
    # get 200 business days since yesterday but leave room in case this is run on weekend
    yesterday = date.today() - timedelta(days=1)
    start_date = yesterday - BDay(202)

    # find the number of holidays between yesterday and 200 days ago and subtract that to get real start date
    calendar = USFederalHolidayCalendar()
    holidays = calendar.holidays(start_date, yesterday)

    start_date = start_date - BDay(len(holidays.tolist()))

    s = pd.Series(start_date)
    return s.map(pd.Timestamp.date)[0]

Exemple #25

0

Afficher le fichier

Fichier : test_holiday.py Projet : xcompass/pandas

    def test_calendar(self):

        calendar = USFederalHolidayCalendar()
        holidays = calendar.holidays(self.start_date, self.end_date)

        holidays_1 = calendar.holidays(self.start_date.strftime('%Y-%m-%d'),
                                       self.end_date.strftime('%Y-%m-%d'))
        holidays_2 = calendar.holidays(Timestamp(self.start_date),
                                       Timestamp(self.end_date))

        assert list(holidays.to_pydatetime()) == self.holiday_list
        assert list(holidays_1.to_pydatetime()) == self.holiday_list
        assert list(holidays_2.to_pydatetime()) == self.holiday_list

Exemple #26

0

Afficher le fichier

def validate(date):

    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(date)  # check recent holidays

    adjusted = False

    if date in holidays:
        #create offset, where offset replace BDay as bussiness day offset to account for holiday
        offset = CustomBusinessDay(calendar=cal)
        date += offset
        adjusted = True

    return date, adjusted

Exemple #27

0

Afficher le fichier

Fichier : builder.py Projet : yihengli/chitaxi

 def assign_workdays(self, data, start, end, speed=False):
     cal = USFederalHolidayCalendar()
     holidays = cal.holidays(start=start, end=end)
     if speed:
         num_process = mp.cpu_count()
         chunk_size = int(data.shape[0] / num_process)
         chunks = [data.loc[data.index[i: i + chunk_size]]
                   for i in range(0, data.shape[0], chunk_size)]
         pools = mp.Pool(num_process)
         res = pools.map(
             partial(self._workday_process, holidays=holidays), chunks)
         return pd.concat(res)
     else:
         return self._workday_process(data, holidays)

Exemple #28

0

Afficher le fichier

def add_time_windows(df):
    """Adds time windows as target variable, based on delivery date and time.

    Calculates number of business days (excl weekends, shipment date, federal holidays) for delivery.
    Adds delivery time to number of business days to get time-in-transit.
    Creates window thresholds based on discussion with Jose.
    Lastly assign time windows to shipments based on time-in-transit.

    Args:
        df (pandas dataframe obj): Pandas dataframe that must contain shipment_date, delivery_date, delivery_time.

    Returns:
        pandas dataframe obj: Pandas dataframe with new columns days_in_transit, days_taken_float (time-in-transit),
            Y (target variable i.e. time window)

    """
    print("Adding time windows i.e. target variable...")
    print(f"Starting with {report_df_stats(df)}")
    start_time = time.time()
    # Calculate days in transit (exclude shipment date, holidays, weekends)
    start_date = df['shipment_date'].min()
    end_date = df['shipment_date'].max()
    calendar = USFederalHolidayCalendar()
    holidays = calendar.holidays(start_date, end_date).date.tolist()
    shipment_dates = [d.date() for d in df['shipment_date']]
    delivery_dates = [d.date() for d in df['delivery_date']]
    # -1 because we will add transit time
    df['days_in_transit'] = np.busday_count(
        shipment_dates, delivery_dates, holidays=holidays) - 1
    # Convert days in transit/delivery time to days taken (with decimals))
    # e.g. if parcel reaches at 12.00pm on 2nd business day, days taken is 1.5
    delivery_percentage_of_day = [
        (timedelta.total_seconds(d) / timedelta(days=1).total_seconds())
        for d in df['delivery_time']
    ]
    df['days_taken_float'] = df['days_in_transit'] + delivery_percentage_of_day
    # Keep rows from -1 to 5 days in transit. The rest are rare occurrences.
    max_days_to_keep = 5
    df = df[df['days_in_transit'].isin(np.arange(-1, max_days_to_keep))]

    # Assign time windows
    time_window_thresholds = create_time_window_thresholds()
    tqdm.pandas(desc="Assign time window")
    df['Y'] = df.progress_apply(lambda x: assign_time_window(
        x['days_taken_float'], time_window_thresholds),
                                axis=1)

    print(f"Ending with {report_df_stats(df)}.")
    utilities.print_elapsed_time(start_time)
    return df

Exemple #29

0

Afficher le fichier

Fichier : process.py Projet : transitmatters/mbta-covid-recovery-dash

def format_subway_data(path_to_csv_file: str):
    # read data, convert to datetime
    df = pd.read_csv(path_to_csv_file)
    df["servicedate"] = pd.to_datetime(df["servicedate"])

    # add holidays
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=df["servicedate"].min(),
                            end=df["servicedate"].max())

    # mark as holiday and weekday
    df["holiday"] = df["servicedate"].dt.date.astype("datetime64").isin(
        holidays.date)
    df["weekday"] = df["servicedate"].dt.dayofweek

    # define peak, mark weekdays, convert service date back
    conditions = [(df["holiday"] == False) & (df["weekday"] < 5)]
    choices = ["peak"]
    df["peak"] = np.select(conditions, choices, default="offpeak")
    df["week"] = df["servicedate"].dt.isocalendar().week
    df["year"] = df["servicedate"].dt.isocalendar().year
    df["servicedate"] = df["servicedate"].dt.date.astype(str)

    # select date of the week
    dates = df[df["weekday"] == 0]
    dates = dates[["servicedate", "week", "year"]].drop_duplicates()

    # limit data to just peak, merge back dates
    final = df[df["peak"] == "peak"]
    final = final.groupby(["year", "week", "route_or_line"
                           ])["sum"].mean().round().reset_index()

    final = final.merge(dates, on=["week", "year"], how="left")

    # get list of bus routes
    routelist = list(set(final["route_or_line"].tolist()))

    # create dict
    output = {}

    # write out each set of routes to dict
    for route in routelist:
        dftemp = final[final["route_or_line"] == route]
        dictdata = (dftemp[["servicedate", "sum"]].rename(columns={
            "servicedate": "date",
            "sum": "riders"
        }).to_dict(orient="records"))
        output[route] = dictdata

    return output

Exemple #30

0

Afficher le fichier

Fichier : Finance.py Projet : amtrek/stock-event-repo

def validate ( date ):
  
    cal = USFederalHolidayCalendar() 
    holidays = cal.holidays(date) # check recent holidays

    adjusted = False

    if date in holidays:
        #create offset, where offset replace BDay as bussiness day offset to account for holiday
        offset = CustomBusinessDay(calendar=cal)
        date+=offset
        adjusted = True

    return date, adjusted

Exemple #31

0

Afficher le fichier

Fichier : test_holiday.py Projet : DusanMilunovic/pandas

    def test_calendar(self):

        calendar = USFederalHolidayCalendar()
        holidays = calendar.holidays(self.start_date, self.end_date)

        holidays_1 = calendar.holidays(
            self.start_date.strftime('%Y-%m-%d'),
            self.end_date.strftime('%Y-%m-%d'))
        holidays_2 = calendar.holidays(
            Timestamp(self.start_date),
            Timestamp(self.end_date))

        assert list(holidays.to_pydatetime()) == self.holiday_list
        assert list(holidays_1.to_pydatetime()) == self.holiday_list
        assert list(holidays_2.to_pydatetime()) == self.holiday_list

Exemple #32

0

Afficher le fichier

Fichier : __init__.py Projet : paulmattheww/utils

def generate_calendar(year, drop_index=False):
    '''
    Simple function to generate a calendar containing
    US holidays, weekdays and  holiday weeks.
    '''
    from pandas.tseries.offsets import YearEnd
    from pandas.tseries.holiday import USFederalHolidayCalendar

    start_date = pd.to_datetime('1/1/'+str(year))
    end_date = start_date + YearEnd()
    DAT = pd.date_range(str(start_date), str(end_date), freq='D')
    MO = [d.strftime('%B') for d in DAT]
    holidays = USFederalHolidayCalendar().holidays(start=start_date, end=end_date)

    cal_df = pd.DataFrame({'date':DAT, 'month':MO})
    cal_df['year'] = [format(d, '%Y') for d in DAT]
    cal_df['weekday'] = [format(d, '%A') for d in DAT]
    cal_df['is_weekday'] = cal_df.weekday.isin(['Monday','Tuesday','Wednesday','Thursday','Friday'])
    cal_df['is_weekday'] = cal_df['is_weekday'].astype(int)
    cal_df['is_holiday'] = cal_df['date'].isin(holidays)
    cal_df['is_holiday'] = cal_df['is_holiday'].astype(int)
    cal_df['is_holiday_week'] = cal_df.is_holiday.rolling(window=7,center=True,min_periods=1).sum()
    cal_df['is_holiday_week'] = cal_df['is_holiday_week'].astype(int)

    if not drop_index: cal_df.set_index('date', inplace=True)

    return cal_df

Exemple #33

0

Afficher le fichier

Fichier : forecast_one_to_one.py Projet : anisbasak/missing_data_imputation

def addExogenousVariables(df):
    holidays = USFederalHolidayCalendar().holidays(start=df.index.min(),
                                                   end=df.index.max())
    df['holiday'] = df.index.isin(holidays).astype(int)
    df['week_number'] = df.index.week
    df['day_of_week'] = df.index.dayofweek
    return df

Exemple #34

0

Afficher le fichier

def get_calendar():
    """Summary
    
    Returns:
        TYPE: Description
    """
    return CustomBusinessDay(calendar=USFederalHolidayCalendar())

Exemple #35

0

Afficher le fichier

Fichier : callbacks.py Projet : liangliang-yang/Capstone-Project-Stock-Predictor

def predict_future_price(df, lookback_days, forcast_days, model, sc):
    """
    Prediction of future stock price
    """

    # calculate forcast dates and indexs, used for plot
    B_DAY = CustomBusinessDay(calendar=USFederalHolidayCalendar()) # business day
    forcast_dates = [df['Date'].iloc[-1]+i*B_DAY for i in range(1,forcast_days+1)]
    forcast_indexs = [df['Date'].index[-1]+i for i in range(1,forcast_days+1)]

    # input X for forcast
    X_scaled = df[['Scaled_Close']][-lookback_days:].values
    X_scaled = X_scaled.reshape((1, lookback_days, 1))

    # prediction the scaled price with model
    forcast_price_scaled = model.predict(X_scaled)

    # transform back to the normal price
    forcast_prices = sc.inverse_transform(forcast_price_scaled)

    # create the forcast_dataframe
    forcast_dataframe = pd.DataFrame({
        'index' : forcast_indexs,
        'Date' : forcast_dates,
        'Adj Close' : forcast_prices[0],
        'Scaled_Close' : forcast_price_scaled[0]
    })

    forcast_dataframe = forcast_dataframe.set_index('index', drop=True)
    return forcast_dataframe

Exemple #36

0

Afficher le fichier

Fichier : epa_amd.py Projet : NREL/Solar-for-Industry-Process-Heat

        def describe_date(amd_data):
            """
            Add columns for weekday, month, and holiday. Based on existing
            timestamp column in amd_data dataframe.
            """

            holidays = USFederalHolidayCalendar().holidays()

            if type(amd_data) == dd.DataFrame:

                amd_data = amd_data.assign(weekday=amd_data.timestamp.map(
                    lambda x: x.weekday() > 4, meta=('weekday', 'bool')))

                amd_data = amd_data.assign(month=amd_data.timestamp.map(
                    lambda x: x.month, meta=('month', 'int')))

                amd_data = amd_data.assign(holiday=amd_data.timestamp.map(
                    lambda x: x.date() in holidays, meta=('holiday', 'bool')))

            if type(amd_data) == pd.DataFrame:

                amd_data['weekday'] = amd_data.timestamp.map(
                    lambda x: x.weekday() > 4)

                amd_data['month'] = amd_data.timestamp.map(lambda x: x.month)

                amd_data['holiday'] = amd_data.timestamp.map(
                    lambda x: x in holidays)

            return amd_data

Exemple #37

0

Afficher le fichier

def Converter_Monthly_Daily(Name_O, Indicator, start_date):
    usb = CustomBusinessDay(calendar=USFederalHolidayCalendar())
    rng = pd.date_range(start=start_date, end="1/1/2018", freq=usb)
    Name_Daily = pd.DataFrame({'DATE': rng})
    Name_Daily['DATE'] = pd.to_datetime(Name_Daily.DATE)
    Name_Daily['Year'] = Name_Daily.DATE.dt.year
    Name_Daily['Month'] = Name_Daily.DATE.dt.month
    Name_Daily['Day'] = Name_Daily.DATE.dt.day
    #Name_Daily = Name_Daily.drop(columns=['DATE'])

    year = Name_Daily['Year'].unique()
    year = np.append(year, [2018])
    arr = []
    index = 0
    for y in year:
        newY = Name_Daily.loc[Name_Daily.Year == y]
        for m in range(1, 13):
            numberofDAYS = len(newY.loc[(newY.Month == m)])
            for day in range(0, numberofDAYS):
                arr.append(Name_O[Indicator][index] +
                           (Name_O[Indicator][index + 1] -
                            Name_O[Indicator][index]) * day / numberofDAYS)
            index = index + 1

    #Name_Daily = Name_Daily.drop(columns=['Year','Month','Day'])
    Name_Daily[Indicator] = arr
    #out_filename = "Economy_Indicators_Data"+ "\\" + Indicator + '_Daily_Insoo.csv'
    #Name_Daily.to_csv(out_filename,sep=',', encoding='utf-8')
    return Name_Daily

Exemple #38

0

Afficher le fichier

Fichier : test_calendar.py Projet : Itay4/pandas

def test_calendar(transform):
    start_date = datetime(2012, 1, 1)
    end_date = datetime(2012, 12, 31)

    calendar = USFederalHolidayCalendar()
    holidays = calendar.holidays(transform(start_date), transform(end_date))

    expected = [
        datetime(2012, 1, 2),
        datetime(2012, 1, 16),
        datetime(2012, 2, 20),
        datetime(2012, 5, 28),
        datetime(2012, 7, 4),
        datetime(2012, 9, 3),
        datetime(2012, 10, 8),
        datetime(2012, 11, 12),
        datetime(2012, 11, 22),
        datetime(2012, 12, 25)
    ]

    assert list(holidays.to_pydatetime()) == expected

Exemple #39

0

Afficher le fichier

Fichier : config.py Projet : schwallie2/alive_rescue

def get_first_bday_of_month(mnth=None, yr=None):
    '''
    Return the first business day of the current month if no variables provided
    Return the first business day of the month and year provided if variables provided

    Tests:
    In [188]: config.get_first_bday_of_month(12,2015)
    Out[188]: datetime.date(2015, 12, 1)

    In [189]: config.get_first_bday_of_month(11,2015)
    Out[189]: datetime.date(2015, 11, 2)

    In [190]: config.get_first_bday_of_month(10,2015)
    Out[190]: datetime.date(2015, 10, 1)

    In [191]: config.get_first_bday_of_month(1,2016)
    Out[191]: datetime.date(2016, 1, 4)

    In [192]: config.get_first_bday_of_month(8,2015)
    Out[192]: datetime.date(2015, 8, 3)
    :param mnth:
    :param yr:
    :return:
    '''
    from calendar import monthrange
    from pandas.tseries.holiday import USFederalHolidayCalendar
    from pandas.tseries.offsets import CustomBusinessDay
    if yr is None or mnth is None:
        yr = pd.datetime.now().year if pd.datetime.now().month != 1 else pd.datetime.now().year - 1
        mnth = pd.datetime.now().month - 1 if pd.datetime.now().month != 1 else 12
    else:
        yr = yr if mnth != 1 else yr - 1
        mnth = mnth - 1 if mnth != 1 else 12
    end_last = monthrange(yr, mnth)
    end_last = pd.Timestamp('%s/%s/%s' % (mnth, end_last[1], yr)).date()
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=end_last - pd.tseries.offsets.Day(60),
                            end=end_last + pd.tseries.offsets.Day(60)).to_pydatetime()
    bday_cus = CustomBusinessDay(holidays=holidays)
    return (end_last + bday_cus).date()

Exemple #40

0

Afficher le fichier

Fichier : create_classifier.py Projet : gnatman/TapPy

    if beginning_range <= date_of_interest <= end_range:
        days_since_school_year_start = (date_of_interest - beginning_range).days
    #for 2015/2016 School year
    beginning_range = datetime.date(2015, 8, 31)
    end_range = datetime.date(2016, 8, 29)
    if beginning_range <= date_of_interest <= end_range:
        days_since_school_year_start = (date_of_interest - beginning_range).days
    datekey_list.append(weather_datekey)
    days_since_school_year_start_list.append(days_since_school_year_start)
days_since_school_year_start_db = {'days_since_school_year_start' : pd.Series(days_since_school_year_start_list, index=datekey_list)}
days_since_school_year_start_df = pd.DataFrame(days_since_school_year_start_db)



#Account for major US federal holidays
cal = USFederalHolidayCalendar()
holidays = cal.holidays(start='2013-01-01', end='2016-12-31').to_pydatetime()
datekey_list = []
holidays_list = []
#for weather_datekey in weather_df.index.values:
for weather_datekey in weather_df.datekey:
    year = int(str(weather_datekey)[:4])
    month = int(str(weather_datekey)[4:6])
    day = int(str(weather_datekey)[6:])
    date_of_interest = datetime.date(year, month, day)
    if date_of_interest in holidays:
        holidays_list.append(1)
    else:
        holidays_list.append(0)
    datekey_list.append(weather_datekey)
holidays_db = {'holidays' : pd.Series(holidays_list, index=datekey_list)}

Exemple #41

0

Afficher le fichier

Fichier : matplotlib_funcs.py Projet : tesskbot/moveloot_public

def make_regression_features_oct(df):
    '''Create features for regression given a dataframe
    '''

    from pandas.tseries.holiday import USFederalHolidayCalendar

    x_dayssincestart = df.dayssincestart

    x_dayofweek = df.dayofweek
    x_dayofweek = pd.get_dummies(x_dayofweek,prefix='dayofweek')

    x_dayofmonth = df.dayofmonth
    x_dayofmonth = pd.get_dummies(x_dayofmonth, prefix='dayofmonth')

    x_week1 = pd.Series([1 if day < 8 else 0 for day in df.dayofmonth],name='week1')
    x_week2 = pd.Series([1 if (day >= 8 and day < 16) else 0 for day in df.dayofmonth],name='week2')
    x_week3 = pd.Series([1 if (day >= 16 and day < 23) else 0 for day in df.dayofmonth],name='week3')
    x_week4 = pd.Series([1 if day >= 23 else 0 for day in df.dayofmonth],name='week4')

    x_isweekend = pd.Series([1 if (day == 5 or day == 6) else 0 for day in df.dayofweek],name='isweekend')

    x_istueswed = pd.Series([1 if (day == 1 or day == 2) else 0 for day in df.dayofweek],name='istueswed')

    x_isfrisat = pd.Series([1 if (day == 4 or day == 5) else 0 for day in df.dayofweek],name='isfrisat')

    #get holidays
    calendar = USFederalHolidayCalendar()
    holidays = calendar.holidays(start=df.datetime.min(), end=df.datetime.max())
    x_isholiday = pd.Series([1 if day in holidays.tolist() else 0 for day in df.datetime],name='x_isholiday')

    #find 3-day weekends
    x_isholidaywknd = pd.Series(np.zeros(len(x_isholiday)),name='x_isholidaywknd')
    x_weekendandholiday = x_isweekend + x_isholiday
    for i in range(1, len(x_weekendandholiday)-2):
        if x_weekendandholiday[i] == 1:
            if x_weekendandholiday[i+1] == 1 and x_weekendandholiday[i+2] ==1:
                x_isholidaywknd[i] = 1
                x_isholidaywknd[i+1] = 1
                x_isholidaywknd[i+2] = 1

    x_week1tueswed = x_week1 & x_istueswed
    x_week2tueswed = x_week2 & x_istueswed
    x_week3tueswed = x_week3 & x_istueswed
    x_week4tueswed = x_week4 & x_istueswed

    x_week1frisat = x_week1 & x_isfrisat
    x_week2frisat = x_week2 & x_isfrisat
    x_week3frisat = x_week3 & x_isfrisat
    x_week4frisat = x_week4 & x_isfrisat

    x_week1wknd = x_week1 & x_isweekend
    x_week2wknd = x_week2 & x_isweekend
    x_week3wknd = x_week3 & x_isweekend
    x_week4wknd = x_week4 & x_isweekend

    X_vars = pd.concat([x_dayssincestart,x_dayofweek,x_dayofmonth,
                       x_week1,x_week2,x_week3,x_week4,
                       x_isweekend,x_istueswed,x_isfrisat,
                       x_isholiday,x_isholidaywknd,
                       x_week1tueswed,x_week2tueswed,x_week3tueswed,x_week4tueswed,
                       x_week1frisat,x_week2frisat,x_week3frisat,x_week4frisat,
                       x_week1wknd,x_week2wknd,x_week3wknd,x_week4wknd],
                       axis=1)

    return X_vars

Exemple #42

0

Afficher le fichier

Fichier : divvy_data_preprocessing.py Projet : mgiangreco/divvy

#add a dummy variable for weekend day
merged_master['weekend_day'] = np.where((merged_master['day_of_week']==5)|(merged_master['day_of_week']==6),1,0)

#drop day_of_week field
merged_master.drop('day_of_week', axis=1, inplace=True)

#add a column for month and create month dummies
merged_master['month'] = merged_master.index.month
dummy_df = pd.get_dummies(merged_master['month'], prefix='month')
merged_master = pd.concat([merged_master,dummy_df], axis=1)
merged_master.drop('month', axis=1, inplace=True)

#add a column for holiday
merged_master['date'] = merged_master.index.date
merged_master['date'] = pd.to_datetime(merged_master['date'])
cal = USFederalHolidayCalendar()
holidays = cal.holidays(start=merged_master.date.min(), end=merged_master.date.max())
merged_master['holiday'] = merged_master['date'].isin(holidays).astype(int)
merged_master.drop('date', axis=1, inplace=True)

#add a column for days since first date in dataset
merged_master['days_since_first_date'] = (merged_master.index.date - (merged_master.index.date.min())).astype('timedelta64[D]').astype(int)

#Drop any rows with null values
merged_master.dropna(axis=0, inplace=True)

#rearrange column order
merged_master['station_id'] = merged_master['from_station_id']
merged_master.drop(['from_station_id'], axis=1, inplace=True)