import numpy as np import matplotlib.pyplot as plt # # NY # account_id = 1 # city_id = 5128581 # start_date = '2014-03-01' # end_date = '2018-05-01' # CT account_id = 2 city_id = 4843564 start_date = '2018-01-02' end_date = '2019-01-30' posData = get_posData(account_id, start_date, end_date) def train_test_split(data, n_test): return data[:-n_test], data[-n_test:] # Target variable target_variable = pd.DataFrame(posData.guests) # days to forecast n_test = 7 # Split target variable into training/test set train, test = train_test_split(target_variable, n_test)
def holiday_check(start_date, end_date): h_calendar = pd.DataFrame( index=pd.date_range(start_date, end_date, freq='D')) h_calendar['date'] = h_calendar.index h_calendar['holiday'] = 0 for i in h_calendar.date: if h_calendar.date[i] in us_holidays: h_calendar.holiday[i] = 1 holiday_check(start_date, end_date) test = h_calendar.loc[start_date:dates[1]] for i in range(len(dates)): posData = get_posData(account_id, start_date, dates[i]) weatherData = get_weatherData(city_id, start_date, dates[i]) holiday = h_calendar.loc[start_date:dates[i]] weatherData['weekday'] = pd.DatetimeIndex(weatherData.index).dayofweek weatherData['weekend'] = (weatherData.weekday > 3).astype(float) weatherData[ "weekend_diff"] = weatherData.weekend - weatherData.weekend.shift( 7).fillna(0) exog_variables = [ weatherData.temp_diff, weatherData.humidity_diff, # weatherData.weekend_diff, holiday.holiday ] target_variable = pd.DataFrame(posData.guests_log_diff) sarimax_d = sarimax_model(target_variable,