def DayDifference(start_date, end_date, Province):
    start_time = time.time()
    d2 = end_date
    d1 = start_date
    count = (d2.hour - d1.hour) / 24 + (d2.minute - d1.minute) / 1440

    if d1.date() == d2.date():
        pass
    elif d1.date() > d2.date():
        delta = d1.date() - d2.date()
        for i in range(delta.days):
            day = d2 + td(days=(i + 1))
            if not (day.isoweekday()
                    in [6, 7]) and not (day in holidays.Canada(prov=Province)):
                count -= 1
    else:
        delta = d2.date() - d1.date()
        for i in range(delta.days):
            day = d1 + td(days=(i + 1))
            if not (day.isoweekday()
                    in [6, 7]) and not (day in holidays.Canada(prov='QC')):
                count += 1

    end_time = time.time()
    return count
Beispiel #2
0
 def test_eq_ne(self):
     us1 = holidays.UnitedStates()
     us2 = holidays.US()
     us3 = holidays.UnitedStates(years=[2014])
     us4 = holidays.US(years=[2014])
     ca1 = holidays.Canada()
     ca2 = holidays.CA()
     ca3 = holidays.Canada(years=[2014])
     ca4 = holidays.CA(years=[2014])
     self.assertEqual(us1, us2)
     self.assertEqual(us3, us4)
     self.assertEqual(ca1, ca2)
     self.assertEqual(ca3, ca4)
     self.assertNotEqual(us1, us3)
     self.assertNotEqual(us1, ca1)
     self.assertNotEqual(us3, ca3)
Beispiel #3
0
def deal_with_holidays():

    holiday = holidays.Canada()
    hol_list = holiday['2018':'2021']
    for date in hol_list:
        data.loc[datetime.strftime(date, '%Y-%m-%d'),
                 'week_index'] = 'Weekened'
Beispiel #4
0
def dayDescription(city, dateStamp, static=False):
    #import pudb; pu.db
    province = stations.city[city].label[:2].upper()
    localHolidays = holidays.Canada(state=province,
                                    years=[dateStamp.year, dateStamp.year - 1],
                                    observed=False)
    for year in [dateStamp.year, dateStamp.year - 1]:
        localHolidays.append({
            datetime.date(year, 11, 11): 'Remembrance Day',
            datetime.date(year, 12, 24): 'Christmas Eve',
            datetime.date(year, 12, 31): 'New Year\'s Eve',
            datetime.date(year, 2, 14): 'Valentine\'s Day',
        })
    holidayName = localHolidays.get(dateStamp, '')
    plainDate = '{} {}'.format(monthName(dateStamp.month), nth(dateStamp.day))
    if holidayName == '':
        return plainDate

    if static is True:
        prevYearDate = datetime.date(dateStamp.year - 1, dateStamp.month,
                                     dateStamp.day)
        prevYearHolidayName = localHolidays.get(prevYearDate, '')
        if holidayName != prevYearHolidayName:
            return plainDate

    return holidayName
Beispiel #5
0
def matchDate(date, dateFilter, between, holiday):
    if holiday != None:
        return holidays.Canada(state='ON', years=date.year,
                               observed=False).get(date, '') == holiday
    if between != None:
        betweenStart = dt.date(date.year, between[0][0], between[0][1])
        betweenEnd = dt.date(date.year, between[1][0], between[1][1])
        if date < betweenStart or date > betweenEnd:
            return False
    return any(map(lambda d: fnmatch.fnmatch(str(date), d), dateFilter))
    def is_holiday(date):
        """
        Determines whether a given date is a Canadian Holiday
        :param date: Datetime object
        :return: True if date is a Canadian Holiday, False otherwise
        """
        canada_holidays = holidays.Canada()  # TODO handle province specific holidays
        if date in canada_holidays:
            return True, canada_holidays.get(date)

        return False, NOT_AVAILABLE
Beispiel #7
0
def return_date(start, prediction):
    Province = 'QC'
    done = False

    # if prediction is less than 1 day... then just calculate the amount of minutes and add to the start date
    if prediction < 1:
        minutes_left = round(prediction * 1440, 0)
        prediction_date = start + td(minutes=minutes_left)
        # If it happens to land on a weekend or holiday, just add 1 day until you reach a business day
        while prediction_date.isoweekday() in [
                6, 7
        ] or prediction_date in holidays.Canada(prov=Province):
            prediction_date = prediction_date + td(days=1)

    #if prediction is longer than 1 day do this:
    else:
        #First seperate the prediction into whole days and minutes
        prediction_date = start
        pred_days = math.floor(prediction)
        minutes_left = (prediction - pred_days) * 1440

        # add the whole days to the start date
        for i in range(pred_days):
            prediction_date = prediction_date + td(days=1)
            # if the date lands on a holiday or weekend, keep adding 1 day until it lands on a business day
            while prediction_date.isoweekday() in [
                    6, 7
            ] or prediction_date in holidays.Canada(prov=Province):
                prediction_date = prediction_date + td(days=1)

        #Now proceed to add the minutes left
        prediction_date = prediction_date + td(minutes=minutes_left)
        # Once more if it lands on a holiday or weekend, keep adding 1 day until it lands on a business day
        while prediction_date.isoweekday() in [
                6, 7
        ] or prediction_date in holidays.Canada(prov=Province):
            prediction_date = prediction_date + td(days=1)

    return prediction_date
Beispiel #8
0
def DayDifference(start_date, end_date, Province):
    #start_time=time.time()
    timediff = []
    for j in range(len(start_date)):
        if start_date[j] == '0':
            timediff.append('NA')
            continue
        d2 = end_date[j]
        d1 = start_date[j]
        # add fraction of day due to hour and minute difference
        count = (d2.hour - d1.hour) / 24 + (d2.minute - d1.minute) / 1440

        if d1.date() == d2.date():
            timediff.append(count)
        elif d1.date() > d2.date():
            delta = d1.date() - d2.date()
            for i in range(delta.days):
                day = d2 + td(days=(i + 1))
                if not (day.isoweekday() in [6, 7]) and not (
                        day in holidays.Canada(prov=Province)):
                    count -= 1

            # input into timediff array
            timediff.append(count)

        else:
            delta = d2.date() - d1.date()
            for i in range(delta.days):
                day = d1 + td(days=(i + 1))
                if not (day.isoweekday()
                        in [6, 7]) and not (day in holidays.Canada(prov='QC')):
                    count += 1

            # input into timediff array
            timediff.append(count)

    #end_time = time.time()
    #print ('Time differences calculated in %f seconds' %(end_time - start_time))
    return timediff
def add_holiday(df):
    # Define holidays in some countries
    ca_holidays = holidays.Canada()
    us_holidays = holidays.UnitedStates()

    # check if checkin or checkout date is in holiday of different countries
    df['north_am_ci'] = df['srch_ci'].apply(
        lambda x: 1 if x in (us_holidays or ca_holidays) else 0)
    df['north_am_co'] = df['srch_co'].apply(
        lambda x: 1 if x in (us_holidays or ca_holidays) else 0)
    # remove original columns
    df = df.drop(['date_time'], axis=1)
    df = df.drop(['week'], axis=1)
    df = df.drop(['year'], axis=1)
    df = df.drop(['srch_ci'], axis=1)
    df = df.drop(['srch_co'], axis=1)
    df = df.drop(['lag_date_time'], axis=1)
    return df
 def test_isbday(self):
     self.assertFalse(isbday(date(2014, 1, 4)))
     self.assertFalse(isbday("2014-01-04"))
     self.assertTrue(isbday(date(2014, 1, 1)))
     self.assertTrue(isbday("2014-01-01"))
     self.assertFalse(isbday(date(2014, 1, 1), holidays=holidays.US()))
     self.assertTrue(isbday(datetime(2014, 1, 1, 16, 30)))
     self.assertTrue(isbday(datetime(2014, 1, 1, 17, 30)))
     self.assertFalse(isbday(datetime(2014, 1, 1, 16, 30),
                      holidays=holidays.US()))
     self.assertFalse(isbday(datetime(2014, 1, 1, 17, 30),
                      holidays=holidays.US()))
     bdateutil.HOLIDAYS = holidays.Canada()
     self.assertFalse(isbday(date(2014, 7, 1)))
     self.assertTrue(isbday(date(2014, 7, 4)))
     self.assertFalse(isbday(date(2014, 1, 1)))
     self.assertTrue(isbday(date(2014, 7, 1), holidays=holidays.US()))
     self.assertFalse(isbday(date(2014, 7, 4), holidays=holidays.US()))
     bdateutil.HOLIDAYS = []
Beispiel #11
0
def add_holiday(df_weather):
    en_holidays = holidays.England()
    ir_holidays = holidays.Ireland()
    ca_holidays = holidays.Canada()
    us_holidays = holidays.UnitedStates()
    
    en_idx = df_weather.query('site_id == 1 or site_id == 5').index
    ir_idx = df_weather.query('site_id == 12').index
    ca_idx = df_weather.query('site_id == 7 or site_id == 11').index
    us_idx = df_weather.query('site_id == 0 or site_id == 2 or site_id == 3 or site_id == 4 or site_id == 6 or site_id == 8 or site_id == 9 or site_id == 10 or site_id == 13 or site_id == 14 or site_id == 15').index
    
    df_weather['IsHoliday'] = 0
    df_weather.loc[en_idx, 'IsHoliday'] = df_weather.loc[en_idx, 'timestamp'].apply(lambda x: en_holidays.get(x, default=0))
    df_weather.loc[ir_idx, 'IsHoliday'] = df_weather.loc[ir_idx, 'timestamp'].apply(lambda x: ir_holidays.get(x, default=0))
    df_weather.loc[ca_idx, 'IsHoliday'] = df_weather.loc[ca_idx, 'timestamp'].apply(lambda x: ca_holidays.get(x, default=0))
    df_weather.loc[us_idx, 'IsHoliday'] = df_weather.loc[us_idx, 'timestamp'].apply(lambda x: us_holidays.get(x, default=0))
    
    holiday_idx = df_weather['IsHoliday'] != 0
    df_weather.loc[holiday_idx, 'IsHoliday'] = 1
    df_weather['IsHoliday'] = df_weather['IsHoliday'].astype(np.uint8)
    
    return df_weather
Beispiel #12
0
import holidays
import datetime

holidays = holidays.UnitedStates() +  holidays.Canada()

def sanitize_dates(date_string):
    date = datetime.datetime.strptime(date_string, "%Y-%m-%d")
    if not date.weekday() < 5:
        date = date + datetime.timedelta(days=2)
    if date in holidays:
        date = date + datetime.timedelta(days=1)

    return date.strftime("%Y-%m-%d")
    # return datetime.datetime.timestamp(date) 

def shift_date_string(date_string, num):
    date = datetime.datetime.strptime(date_string, "%Y-%m-%d")
    date = date + datetime.timedelta(days=num)
    return date.strftime("%Y-%m-%d")

def date_to_timestamp(date_string):
    date = datetime.datetime.strptime(date_string, "%Y-%m-%d")
    return datetime.datetime.timestamp(date) 
    toronto_weather.Name.unique())]

# generate station long and lat to toronto_weather
toronto_weather['Latitude'] = toronto_weather.Name.apply(
    lambda name: station_toronto[station_toronto.Name == name].Latitude.values[
        0] / 10000000)
toronto_weather['Longitude'] = toronto_weather.Name.apply(
    lambda name: station_toronto[station_toronto.Name == name
                                 ].Longitude.values[0] / 10000000)

ottawa_station_lookup = pd.Index(station_ottawa.Name)
toronto_station_lookup = pd.Index(station_toronto.Name)
calgary_station_lookup = pd.Index(station_calgary.Name)

# obtain all of the holidays
Canadian_Holidays = holidays.Canada()


def read_data() -> 'tuple(pd.DataFrame)|3':
    '''Obtain all of the citys data'''
    ottawa, calgary, toronto = None, None, None
    print("Retrieveing data from collision datasets")
    if os.path.isfile('./datasets/tmp/ottawa.csv'):
        ottawa = pd.read_csv('./datasets/tmp/ottawa.csv')
    else:
        # obtain ottawa
        ottawa_files = os.listdir(collision_dir('Ottawa'))
        ottawa_file_names = list(
            map(lambda x: join_path(collision_dir('Ottawa'), x),
                filter(regex.match, ottawa_files)))
Beispiel #14
0
import os
import sys
import holidays
import pandas as pd
from datetime import date, timedelta

can_holidays = holidays.Canada()
us_holidays = holidays.UnitedStates()

curr_dir = os.path.abspath(os.path.dirname(sys.argv[0]) or '.')
date_csv_path = os.path.join(curr_dir,
                             './../../data/dimensions/date_dimension.csv')


def get_season(date):
    m = date.month * 100
    d = date.day
    md = m + d

    if ((md >= 301) and (md <= 531)):
        s = 'spring'
    elif ((md > 531) and (md < 901)):
        s = 'summer'
    elif ((md >= 901) and (md <= 1130)):
        s = 'fall'
    elif ((md > 1130) or (md <= 229)):
        s = 'winter'
    else:
        raise IndexError("Invalid date")

    return s
import os
from io import StringIO

import pandas as pd
import numpy as np
import streamlit as st
import altair as alt
import holidays  # For time-of-use categorization

from scraper import get_hydro_usage

ONTARIO_HOLIDAYS = holidays.Canada(prov="ON")

try:
    USERNAME = os.environ["TORONTOHYDRO_USERNAME"]
    PASSWORD = os.environ["TORONTOHYDRO_PASSWORD"]
except KeyError as e:
    print((
        "Must specify username and password using the environment variables "),
          ("TORONTOHYDRO_USERNAME and TORONTOHYDRO_PASSWORD."))
    raise e


def get_time_of_use_period(dt: pd.Timestamp) -> str:
    """Return a string corresponding to the time-of-use category for the
    datetime."""
    # 7PM - 7AM, weekends, and holidays are always off-peak
    if (dt.weekday() >= 5 or not (7 <= dt.hour < 12 + 7)
            or dt in ONTARIO_HOLIDAYS):
        return "Off-peak"
    # Nov - Apr is winter, versus May-Oct for summer
Beispiel #16
0
def historic_load():
    def transform(transform_function, data):
        print()
        pool = ThreadPool(64)
        results = pool.map(transform_function, data)
        pool.close()
        pool.join()
        # Combine the dataframes
        df = results[0]
        for result in results[1:]:
            df = df.append(result)
        return df

    location_data = []
    if os.path.isfile('data/collisions/ottawa/collision_data_transformed.csv'):
        print("Reading Transformed Collision Data...")
        collision_data = pandas.read_csv('./data/collisions/ottawa/collision_data_transformed.csv')
        print('Done Reading Transformed Collision Data!')
    else:
        # Extract data
        print('Extracting Collision Data...')
        collision_data = collision.extract_collision_data()
        # Transform the data with workers
        print('Transforming Collision Data...')
        print(len(collision_data))
        collision_data = transform(collision.transform_collision_data, collision_data)
        print('Transformed Collision Data!')
        # Save file for next time
        collision_data.to_csv('data/collisions/ottawa/collision_data_transformed.csv')
    if os.path.isfile('data/collisions/ottawa/location_dim_transformed.csv'):
        print('Reading Final Transformed Location Data...')
        location_data = pandas.read_csv('./data/collisions/ottawa/location_dim_transformed.csv')
        print('Done Reading Final Transformed Location Data!')
    elif os.path.isfile('data/collisions/ottawa/location_dim.csv'):
        print('Reading Transformed Location Data...')
        location_data = pandas.read_csv('./data/collisions/ottawa/location_dim.csv')
        print('Done Reading Transformed Location Data!')

    if os.path.isfile('data/weather/ontario/weather_data_transformed.csv'):
        print("Reading Transformed Weather Data...")
        weather_data = pandas.read_csv('data/weather/ontario/weather_data_transformed.csv', parse_dates=['Date_Time'],
                                       infer_datetime_format=True)
        print('Done Reading Transformed Weather Data!')
    else:
        print('Extracting Weather Data...')
        weather_data = weather.extract_weather_data(
            pandas.to_datetime(collision_data['Date'] + ' ' + collision_data['Time']))
        print('Transforming Weather Data...')
        weather_data = transform(weather.transform_weather_data, weather_data)
        print('Transformed Weather Data!')
        weather_data.to_csv('data/weather/ontario/weather_data_transformed.csv')

    if os.path.isfile('data/weather/ontario/hour_data_transformed.csv'):
        print("Reading Transformed Hour Data...")
        hour_data = pandas.read_csv('data/weather/ontario/hour_data_transformed.csv')  # FIXME dtypes
        print('Done Reading Transformed Hour Data!')
    else:
        print('Extracting Hour Data...')
        print('Transforming Hour Data...')
        hour_data = weather_data[['Date_Time']]
        import holidays
        holidays = holidays.Canada()
        hour_data = hour_data.merge(hour_data.Date_Time.apply(lambda datetime: pandas.Series(
            {'time': datetime.time(), 'date': datetime.date(), 'day_of_week': datetime.dayofweek,
             'month': datetime.month,
             'year': datetime.year, 'weekend': (False if datetime.dayofweek < 5 else True),
             'holiday': datetime.date() in holidays, 'holiday_name': holidays.get(datetime.date())})), left_index=True,
                                    right_index=True).drop(columns=['Date_Time'])
        print('Transformed Hour Data!')
        # hour_data.to_csv('data/weather/ontario/hour_data_transformed.csv')

    # Load data into database
    print('Loading Data...')
    print('Loading Events Dimension...')
    load_events(list(set(collision.events())))
    print('Loaded Events Dimension!')
    load(collision_data, weather_data, location_data, hour_data)
Beispiel #17
0
def add_holiday(x):
    time_range = pd.date_range(start='2015-12-31', end='2019-01-01', freq='h')
    country_holidays = {'UK': holidays.UK(), 'US': holidays.US(), 'IRL': holidays.Ireland(), 'CAN': holidays.Canada()}

    holiday_mapping = pd.DataFrame()
    for site in range(16):
        holiday_mapping_i = pd.DataFrame({'site': site, 'timestamp': time_range})
        holiday_mapping_i['h0'] = holiday_mapping_i['timestamp'].apply(
            lambda x: x in country_holidays[locate[site]['country']]).astype(int)
        holiday_mapping = pd.concat([holiday_mapping, holiday_mapping_i], axis=0)

    x = pd.merge([x, holiday_mapping], on=['site', 'timestamp'], how='left')
    return x
import holidays

for year in range(2016, 1880, -1):
    for date, name in holidays.Canada(state='ON', years=year).items():
        if name == 'Victoria Day':
            print(date, name)

for date, name in holidays.Canada(state='ON', years=2016,
                                  observed=False).items():
    print(date, name)
Beispiel #19
0
    weather_df = weather_df.drop(['datetime', 'day', 'week', 'month'], axis=1)

    return weather_df


weather['timestamp'] = weather['timestamp'].astype(str)
weather = fill_weather_dataset(weather)
weather['timestamp'] = pd.to_datetime(weather['timestamp'])

# holiday imformation

import holidays

en_holidays = holidays.England()
ir_holidays = holidays.Ireland()
ca_holidays = holidays.Canada()
us_holidays = holidays.UnitedStates()

en_idx = weather.query('site_id == 1 or site_id == 5').index
ir_idx = weather.query('site_id == 12').index
ca_idx = weather.query('site_id == 7 or site_id == 11').index
us_idx = weather.query(
    'site_id == 0 or site_id == 2 or site_id == 3 or site_id == 4 or site_id == 6 or site_id == 8 or site_id == 9 or site_id == 10 or site_id == 13 or site_id == 14 or site_id == 15'
).index

weather['IsHoliday'] = 0
weather.loc[en_idx, 'IsHoliday'] = weather.loc[en_idx, 'timestamp'].apply(
    lambda x: en_holidays.get(x, default=0))
weather.loc[ir_idx, 'IsHoliday'] = weather.loc[ir_idx, 'timestamp'].apply(
    lambda x: ir_holidays.get(x, default=0))
weather.loc[ca_idx, 'IsHoliday'] = weather.loc[ca_idx, 'timestamp'].apply(
Beispiel #20
0
 def __init__(self):
     self.m = [[0, 31], [1, 28], [2, 31], [3, 30], [4, 31], [5, 30],
               [6, 31], [7, 31], [8, 30], [9, 31], [10, 30], [11, 31]]
     self.cdn_holidays = holidays.Canada()
Beispiel #21
0
def ProcessWeatherForMLInputs(PastAndForecastedWeatherPerCity):
    ca_holidays = holidays.Canada(state='AB',
                                  years=list(i for i in range(2016, 2035)))
    PastAndForecastedWeatherPerZone = dict()
    for key in zone_to_city.keys():
        city_list = zone_to_city[key]
        if len(city_list) == 1:
            temp_df = PastAndForecastedWeatherPerCity[city_list[0]]
            temp_df.columns = [
                'date', 'min_temp', 'max_temp', 'avg_temp', 'avg_hourly_temp',
                'avg_wind_speed'
            ]
            final_df = temp_df
        else:
            city1, city2 = city_list
            temp1 = PastAndForecastedWeatherPerCity[city1]
            temp2 = PastAndForecastedWeatherPerCity[city2]
            merge_df = pd.merge(temp1, temp2, on='date')
            merge_df.columns = [
                'date', 'min_temp1', 'max_temp1', 'avg_temp1',
                'avg_hourly_temp1', 'avg_wind_speed1', 'min_temp2',
                'max_temp2', 'avg_temp2', 'avg_hourly_temp2', 'avg_wind_speed2'
            ]
            temp_df = pd.DataFrame()
            temp_df['date'] = merge_df['date']
            for colname in [
                    'min_temp', 'max_temp', 'avg_temp', 'avg_hourly_temp',
                    'avg_wind_speed'
            ]:
                values1 = merge_df[colname + '1'].values
                values2 = merge_df[colname + '2'].values
                temp_df[colname] = [
                    np.mean([values1[i], values2[i]])
                    for i in range(len(values1))
                ]
            final_df = temp_df

        final_df = final_df.assign(
            **
            {'HDD New': [max(0, 18 - x) for x in final_df['avg_temp'].values]})

        final_df = final_df.assign(
            **{
                'Dayofweek': [x.dayofweek for x in final_df['date']],
                'Monthofyear': [x.month for x in final_df['date']]
            })

        final_df = final_df.assign(
            **{
                '2D_Dayofweek':
                [np.sin(2 * np.pi * (x) / 7) for x in final_df['Dayofweek']],
                '2D_Monthofyear':
                [np.sin(2 * np.pi * x / 12) for x in final_df['Monthofyear']],
                'Weekend': ((final_df['Dayofweek'] == 5)
                            | (final_df['Dayofweek'] == 6)).astype(float),
                'temp*wind':
                final_df['avg_temp'] * final_df['avg_wind_speed']
            })

        final_df = final_df.assign(**{
            'holiday':
            [1 if x in ca_holidays else 0 for x in final_df['date']]
        })

        final_df = final_df.assign(
            **{
                'temp_squared': final_df['avg_temp']**2,
                'temp_cubic': final_df['avg_temp']**3
            })

        final_df = final_df.assign(
            **{'temp_diff': final_df['avg_temp'].diff()})

        final_df = final_df.assign(
            **{
                'wind_squared': final_df['avg_wind_speed']**2,
                'wind_cubic': final_df['avg_wind_speed']**3
            })
        final_df.dropna(inplace=True)
        PastAndForecastedWeatherPerZone[key] = final_df
    return PastAndForecastedWeatherPerZone
Beispiel #22
0
def stat_holidays(province='BC', year=2015):
    """ Returns a list of holiday dates for a province and year. """
    return holidays.Canada(state=province, years=year).keys()
	def fit(self, X, y=None):
		self.USh = holidays.UnitedStates()
		self.CAh = holidays.Canada()
		self.UKh = holidays.UnitedKingdom()
		self.IEh = holidays.Ireland()
		return self
Beispiel #24
0
def weather_feature_engineering(df):

    fs = us.get_feature_settings()

    # Humidity

    if fs['do_humidity']:

        saturated_vapor_pressure = 6.11 * (10.0
                                           **(7.5 * df['air_temperature'] /
                                              (237.3 + df['air_temperature'])))
        actual_vapor_pressure = 6.11 * (10.0
                                        **(7.5 * df['dew_temperature'] /
                                           (237.3 + df['dew_temperature'])))
        df['humidity'] = (actual_vapor_pressure /
                          saturated_vapor_pressure) * 100
        df['humidity'] = df['humidity'].astype(np.float)

    feature_cols = fs['weather_lag_vars']
    lag_values = fs['weather_lag_values']

    # lags

    for site_id in range(c.SITE_ID_RANGE):

        mask = df['site_id'] == site_id

        for feature in feature_cols:
            col_names_lags = [
                feature + '_lag_' + str(shift) for shift in lag_values
            ]

            for idx in range(0, len(lag_values)):
                df.loc[mask, col_names_lags[idx]] = df.loc[mask,
                                                           feature].shift(
                                                               lag_values[idx])

    # window_average

    feature_cols = fs['weather_average_vars']
    window = fs['weather_average_window']

    df_site = df.groupby('site_id')

    df_rolled = df_site[feature_cols].rolling(window=window, min_periods=0)
    df_mean = df_rolled.mean().reset_index().astype(np.float16)
    df_std = df_rolled.std().reset_index().astype(np.float16)

    for feature in feature_cols:
        df[f'{feature}_mean_window_{window}'] = df_mean[feature]
        df[f'{feature}_std_window_{window}'] = df_std[feature]

    # holidays

    if fs['do_holidays']:

        en_holidays = holidays.England()
        ir_holidays = holidays.Ireland()
        ca_holidays = holidays.Canada()
        us_holidays = holidays.UnitedStates()

        en_sites = c.SITE_COUNTRIES.get('England')
        ir_sites = c.SITE_COUNTRIES.get('Ireland')
        ca_sites = c.SITE_COUNTRIES.get('Canada')
        us_sites = c.SITE_COUNTRIES.get('United_States')

        en_idx = df.query('site_id in @en_sites').index
        ir_idx = df.query('site_id in @ir_sites').index
        ca_idx = df.query('site_id in @ca_sites').index
        us_idx = df.query('site_id in @us_sites').index

        df['is_holiday'] = 0
        df.loc[en_idx, 'is_holiday'] = df.loc[en_idx, 'timestamp'].apply(
            lambda x: en_holidays.get(x, default=0))
        df.loc[ir_idx, 'is_holiday'] = df.loc[ir_idx, 'timestamp'].apply(
            lambda x: ir_holidays.get(x, default=0))
        df.loc[ca_idx, 'is_holiday'] = df.loc[ca_idx, 'timestamp'].apply(
            lambda x: ca_holidays.get(x, default=0))
        df.loc[us_idx, 'is_holiday'] = df.loc[us_idx, 'timestamp'].apply(
            lambda x: us_holidays.get(x, default=0))

        holiday_idx = df['is_holiday'] != 0
        df.loc[holiday_idx, 'is_holiday'] = 1
        df['is_holiday'] = df['is_holiday'].astype(np.uint8)

    return df
maps = np.load("demand_smaller.npz")
training_data = maps['arr_0']
maps.close()

do_2019 = True
if not do_2019:
    #maps = np.load("demand.npz")
    data = training_data
    start_time = datetime.datetime(2018, 1, 1)
else:
    maps = np.load("test_heatmaps.npz")
    data = maps['arr_0']
    maps.close()
    start_time = datetime.datetime(2019, 1, 1)

ca_holidays = holidays.Canada(prov='BC')

SEQ_LEN = 6

if do_2019:
    timename = "lstm_time_2019.npz"
    weathername = "lstm_weather_2019.npz"
else:
    timename = "lstm_time.npz"
    weathername = "lstm_weather.npz"

precompute = False
if precompute:
    times_of_interest = []
    time_features = []
    weather_features = np.zeros((data.shape[0], 30, 50, 5))
#aggravated-assault         Offence Against a Person
#all-other-crimes           Other Theft
#theft-from-motor-vehicle   Theft from Vehicle
#all-other-crimes           Theft of Bicycle
#auto-theft                 Theft of Vehicle
#traffic-accident           Vehicle Collision or Pedestrian Struck (with Fatality)
#traffic-accident           Vehicle Collision or Pedestrian Struck (with Injury)

#DAYS OF WEEK
week_Days = [
    'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
    'Sunday'
]

#HOLIDAYS for BC AND COLORADO
bc_holidays = holidays.Canada(years=crimeYears, state="BC")
co_holidays = holidays.UnitedStates(years=crimeYears, state="CO")

#DEFINING ARRAY TO HOLD INFO FROM DATA CSV
type = []
year = []
month = []
day = []
hour = []
min = []
hundBlock = []
neighbourhood = []
x = []
y = []