예제 #1
0
import numpy as np
import matplotlib.pyplot as plt

# # NY
# account_id = 1
# city_id = 5128581
# start_date = '2014-03-01'
# end_date = '2018-05-01'

# CT
account_id = 2
city_id = 4843564
start_date = '2018-01-02'
end_date = '2019-01-30'

posData = get_posData(account_id, start_date, end_date)


def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]


# Target variable
target_variable = pd.DataFrame(posData.guests)

# days to forecast
n_test = 7

# Split target variable into training/test set
train, test = train_test_split(target_variable, n_test)
def holiday_check(start_date, end_date):
    h_calendar = pd.DataFrame(
        index=pd.date_range(start_date, end_date, freq='D'))
    h_calendar['date'] = h_calendar.index
    h_calendar['holiday'] = 0
    for i in h_calendar.date:
        if h_calendar.date[i] in us_holidays:
            h_calendar.holiday[i] = 1


holiday_check(start_date, end_date)
test = h_calendar.loc[start_date:dates[1]]

for i in range(len(dates)):
    posData = get_posData(account_id, start_date, dates[i])
    weatherData = get_weatherData(city_id, start_date, dates[i])
    holiday = h_calendar.loc[start_date:dates[i]]
    weatherData['weekday'] = pd.DatetimeIndex(weatherData.index).dayofweek
    weatherData['weekend'] = (weatherData.weekday > 3).astype(float)
    weatherData[
        "weekend_diff"] = weatherData.weekend - weatherData.weekend.shift(
            7).fillna(0)
    exog_variables = [
        weatherData.temp_diff,
        weatherData.humidity_diff,
        #                      weatherData.weekend_diff,
        holiday.holiday
    ]
    target_variable = pd.DataFrame(posData.guests_log_diff)
    sarimax_d = sarimax_model(target_variable,