else: # get dates for previous days predictions/forecast DATES = DATES_O[0:j] obs_y_trunc = obs_y[0:j] #if len(obs_y_trunc) == 0 or max(obs_y_trunc) == 1: # continue # declare x as a list of integers from 0 to len(y) x = list(range(len(obs_y_trunc))) iterations = 50000 SEIR_fit = 0 obs_pred_r2, obs_x, pred_y, forecasted_x, forecasted_y, params = fxns.fit_curve(x, obs_y_trunc, model, ForecastDays, PopSize, ArrivalDate, j, iterations, SEIR_fit) print(obs_pred_r2) if obs_pred_r2 < 0: obs_pred_r2 = 0.0 # convert any y-values (observed, predicted, or forecasted) # that are less than 0 (nonsensical values) to 0. obs_y_trunc[obs_y_trunc < 0] = 0 pred_y = np.array(pred_y) pred_y[pred_y < 0] = 0 forecasted_y = np.array(forecasted_y) forecasted_y[forecasted_y < 0] = 0
def _get_fit(self, focal_loc, ForecastDays, StatePops, model, seir_fits_df): PopSize = StatePops[StatePops['Province/State'] == focal_loc]['PopSize'].tolist() PopSize = PopSize[0] ArrivalDate = StatePops[StatePops['Province/State'] == focal_loc][ 'Date_of_first_reported_infection'].tolist() ArrivalDate = ArrivalDate[0] SEIR_Fit = seir_fits_df[seir_fits_df['focal_loc'] == focal_loc] # A function to generate all figures and tables # variables: # obs_x: observed x values # obs_y: observed y values # model: the model to fit # T0: likely date of first infection # ForecastDays: number of days ahead to extend predictions # N: population size of interest # ArrivalDate: likely data of first infection (used by SEIR-SD model) # incubation_period: disease-specific epidemilogical parameter # average number of days until an exposed person becomes # begins to exhibit symptoms of infection # infectious_period: disease-specific epidemilogical parameter # average number of days an infected person is infected # rho: disease-specific epidemilogical parameter # aka basic reproductive number # average number of secondary infections produced by a typical case # of an infection in a population where everyone is susceptible # socdist: population-specific social-distancing parameter # declare the following as global variables so their changes can be # seen/used by outside functions # add 1 to number of forecast days for indexing purposes ForecastDays = int(ForecastDays + 1) # filter main dataframe to include only the chosen location df_sub = self._df[self._df['Province/State'] == focal_loc] # get column labels, will filter below to extract dates yi = list(df_sub) obs_y_trunc = [] fore_clrs = [ 'purple', 'mediumorchid', 'plum', 'blue', 'deepskyblue', 'darkturquoise', 'green', 'limegreen', 'gold', 'orange', 'red' ] pred_clrs = [ '0.0', '0.1', '0.2', '0.25', '0.3', '0.35', '0.4', '0.5', '0.6', '0.7', '0.8' ] for i, j in enumerate(list(range(-10, 1))): pred_clr = pred_clrs[i] fore_clr = fore_clrs[i] if j == 0: # get dates for today's predictions/forecast DATES = yi[4:] obs_y_trunc = df_sub.iloc[0, 4:].values else: # get dates for previous days predictions/forecast DATES = yi[4:j] obs_y_trunc = df_sub.iloc[0, 4:j].values ii = 0 while obs_y_trunc[ii] == 0: ii += 1 y = obs_y_trunc[ii:] dates = DATES[ii:] # declare x as a list of integers from 0 to len(y) x = list(range(len(y))) # Call function to use chosen model to obtain: # r-square for observed vs. predicted # predicted y-values # forecasted x and y values iterations = 2 obs_pred_r2, obs_x, pred_y, forecasted_x, forecasted_y, params = fxns.fit_curve( x, y, model, ForecastDays, PopSize, ArrivalDate, j, iterations, SEIR_Fit) # convert y values to numpy array y = np.array(y) # because it isn't based on a best fit line, # and the y-intercept is forced through [0,0] # a model can perform so poorly that the # observed vs predicted r-square is negative (a nonsensical value) # if this happens, report the r-square as 0.0 if obs_pred_r2 < 0: obs_pred_r2 = 0.0 # convert any y-values (observed, predicted, or forecasted) # that are less than 0 (nonsensical values) to 0. y[y < 0] = 0 pred_y = np.array(pred_y) pred_y[pred_y < 0] = 0 forecasted_y = np.array(forecasted_y) forecasted_y[forecasted_y < 0] = 0 # number of from ArrivalDate to end of forecast window #numdays = len(forecasted_x) latest_date = pd.to_datetime(dates[-1]) first_date = pd.to_datetime(dates[0]) # get the date of the last day in the forecast window future_date = latest_date + datetime.timedelta(days=ForecastDays - 1) # get all dates from ArrivalDate to the last day in the forecast window fdates = pd.date_range(start=first_date, end=future_date) fdates = fdates.strftime('%m/%d') # designature plot label for legend if j == 0: label = 'Current forecast' else: label = str(-j) + ' day old forecast' if label == 'Current forecast': for i, val in enumerate(forecasted_y): if i > 0: if forecasted_y[i] - forecasted_y[i - 1] > 0: self.new_cases.append(forecasted_y[i] - forecasted_y[i - 1]) else: self.new_cases.append(0) if i == 0: self.new_cases.append(forecasted_y[i]) # get dates from ArrivalDate to the current day dates = pd.date_range(start=first_date, end=latest_date) dates = dates.strftime('%m/%d') output_list = [ y, pred_y, forecasted_y, dates, fdates, label, obs_pred_r2, model, focal_loc, PopSize, ArrivalDate, pred_clr, fore_clr ] self._model_fits_df.loc[len(self._model_fits_df)] = output_list