def rank_cities(state): mult = build_multicity_dataset(state) cols = list(filter(re.compile('casos_\d+').search, mult.columns)) mult = mult[cols] print(mult.head()) codes = pd.read_excel('../../data/codigos_{}.xlsx'.format(state), names=['city', 'code'], header=None).set_index('code').T ints = pd.DataFrame() for col in mult.columns: # ints.loc[codes[int(re.sub('casos_', '', col))]] = [np.trapz(mult[col])] ints[col] = [np.trapz(mult[col])] return ints
def build_model(data, lag_order, window_type): data.index = pd.DatetimeIndex(data.index) model = DynamicVAR(data, lag_order=lag_order, window=12, window_type=window_type) return model if __name__ == "__main__": prediction_window = 6 # weeks # scenario = 'local' scenario = 'global' if scenario == 'local': data = get_alerta_table(3303500) # Nova Iguaçu: 3303500 data = data[['casos', 'p_inc100k','nivel']] else: data = build_multicity_dataset('RJ') data = data[[col for col in data.columns if col.startswith('casos') and not col.startswith('casos_est')][:3]] data = data.diff() print(data.info()) #TODO: Apply Seasonal differencing to series # data.casos.plot(title="Series") model = build_model(data, 12, 'expanding') # fit = model.(maxlags=11, ic='aic') # 4 lags # print(model.coefs.minor_xs('casos_3303500').info()) forecast = model.forecast(prediction_window) print(forecast) model.plot_forecast(prediction_window) plt.savefig('DVAR_forecast_{}_weeks.png'.format(prediction_window))
def test_multi_city_dataset(self): df = build_multicity_dataset('RJ') self.assertGreater(len(df.columns), 500)