def prepare_data_features_kr(df_feat_kr): ''' Prepare DafaFrame to be used with model Reduce timeframe to usable dates without NaN values for features ''' df_out = df_feat_kr.copy() # correct age df_age_pos = df_out["age_pos"].copy() df_age_pos.fillna(method="pad", inplace=True) df_out["age_pos"] = df_age_pos # check by drop if nan df_out.dropna(inplace=True, subset=[ "date", 'nb_cases', 'T_min', 'T_max', 'H_mean', 'W_speed', 'pos', 'test', 'day_num', 'age_pos' ]) date_old = add_days(df_out.index[0].strftime("%Y-%m-%d"), -1) dates_index = df_out.index.strftime("%Y-%m-%d") for date_curr in dates_index: if date_curr != add_days(date_old, 1): print("ERROR : ", date_curr) #break assert date_curr == add_days(date_old, 1) date_old = date_curr return df_out
def extrap_missing_kr(df_feat_kr_tmp, list_missing): ''' Extrapolate data if missing dates in data ''' def fun_apply_extrap(date_curr): return date_curr in list_missing if len(list_missing) == 0: return df_feat_kr_tmp '''for dates_curr in list_missing: list_dates_tmp = df_feat_kr_tmp["date"].tolist() # search day-1 date_before = add_days(dates_curr, -1) if date_before in list_dates_tmp:''' str_date_min = min(df_feat_kr_tmp["date"]) str_date_min = add_days(str_date_min, -1) list_range = generate_list_dates(str_date_min, max(df_feat_kr_tmp["date"])) new_index = pd.DatetimeIndex(list_range) df_feat_kr_tmp = df_feat_kr_tmp.reindex(new_index, method='ffill') df_feat_kr_tmp["date"] = \ pd.to_datetime(df_feat_kr_tmp.index).strftime("%Y-%m-%d").tolist() df_feat_kr_tmp["extrap"] = False df_feat_kr_tmp["extrap"] = df_feat_kr_tmp["date"].apply(fun_apply_extrap) return df_feat_kr_tmp
def check_update_df_feat_kr(date_now=None, force_update=False): if date_now is None: date_now = datetime.datetime.now().strftime("%Y-%m-%d") if force_update: return True, DATE_FIRST_CASES_GOUV_KR, \ DATE_FIRST_FEAT_OK_KR, date_now flag_update = True # update to be done ? flag_update_age = True if os.path.isfile(PATH_DF_FEAT_KR): df_feat_kr = pd.read_csv(PATH_DF_FEAT_KR) # first date to download for cases date_first_extrap = get_first_day_extrap_kr(df_feat_kr) if date_first_extrap is not None: # take first date of extrap to try to update it date_req_start = date_first_extrap else: # normal mode : take last date +1 date_req_start = add_days(df_feat_kr["date"].max(), 1) # first date to download for ages date_req_start_age = \ df_feat_kr[df_feat_kr["daily_age"].isna() & \ (df_feat_kr["date"] > DATE_FIRST_FEAT_OK_KR)]["date"].min() if (date_req_start_age is np.nan): date_req_start_age = date_req_start #df_feat_kr[df_feat_kr["age_pos"].isna()]["date"].max() else: date_req_start = DATE_FIRST_CASES_GOUV_KR date_req_start_age = DATE_FIRST_FEAT_OK_KR if date_req_start > date_now: #print(" date_req_start : ", date_req_start) #print("update false") flag_update = False if date_req_start_age > date_now: flag_update_age = False if not flag_update: date_req_start = None if not flag_update_age: date_req_start_age = None if (flag_update | flag_update_age): date_req_end = date_now else: date_req_end = None print("Updating Data KR...") print("update cases : ", flag_update) print("update age : ", flag_update_age) print("date_req_start: ", date_req_start) print("date_req_start_age: ", date_req_start_age) print("date_req_end: ", date_req_end) return flag_update, flag_update_age, \ date_req_start, date_req_start_age, date_req_end
def update_pred_pos(df_feat_fr, from_disk=False): ''' Update prediction data positive cases France ''' # check if last prediction is after last known date if os.path.isfile(PATH_DF_PLOT_PRED): df_plot_pred = pd.read_csv(PATH_DF_PLOT_PRED) df_plot_pred.index = df_plot_pred["date"] if df_plot_pred["date"].min() <= df_feat_fr["date"].max(): from_disk = False # if no prediction or if from disk if (not settings.PREDICT) | from_disk: df_plot_pred = pd.read_csv(PATH_DF_PLOT_PRED) df_plot_pred.index = df_plot_pred["date"] return df_plot_pred # prepare features dataset, data_std, data_mean = prepare_dataset(df_feat_fr) # predict next days if settings.MODEL_TFLITE: json_list_list_x = prepare_to_lambda_future(dataset) resp = requests.post(URL_PREDICT, json=json_list_list_x) print("status code : ", resp.status_code) if resp.status_code == 200: y_multi_pred = retrieve_from_lambda(resp) else: print("AWS Lamdba future pred ERROR!") df_plot_pred = pd.read_csv(PATH_DF_PLOT_PRED) df_plot_pred.index = df_plot_pred["date"] return df_plot_pred else: # prepare data : very last days x_multi = np.array([dataset[-PAST_HISTORY:, :]]) # load model multi_step_model = tf.keras.models.load_model(PATH_MDL_MULTI_STEP) y_multi_pred = multi_step_model.predict(x_multi) # convert in positive cases y_pos_pred = y_multi_pred * data_std[4] + data_mean[4] # pos pred next 3 days from last day : date, pos, total (sum) str_date_pred_0 = df_feat_fr.date.max() str_date_pred_1 = add_days(str_date_pred_0, FUTURE_TARGET) list_dates_pred = generate_list_dates(str_date_pred_0, str_date_pred_1) # figure df_plot_pred = pd.DataFrame(index=list_dates_pred, columns=["date"], data=list_dates_pred) df_plot_pred["pos"] = y_pos_pred[0].astype(int) arr_nb_pred = df_plot_pred["pos"].cumsum().values df_plot_pred["nb_cases"] = df_feat_fr["nb_cases"].max() + arr_nb_pred # save for future pred df_plot_pred.to_csv(PATH_DF_PLOT_PRED, index=False) return df_plot_pred
def update_pos(df_feat_fr): ''' Update plot data positive cases France ''' # pos last NB_DAY_PLOT days : date, pos, total (sum) str_date_0 = add_days(df_feat_fr.date.max(), -NB_DAY_PLOT) df_plot = df_feat_fr[df_feat_fr["date"] >= str_date_0].copy() return df_plot
def update_pred_pos_all_kr(df_feat_kr, from_disk=False): ''' Update prediction data positive cases France for all days ''' if os.path.isfile(PATH_DF_PLOT_PRED_ALL_KR): df_plot_pred = pd.read_csv(PATH_DF_PLOT_PRED_ALL_KR) df_plot_pred.index = df_plot_pred["date"] if df_plot_pred["date"].max() < df_feat_kr["date"].max(): from_disk = False else: from_disk = False if (not settings.PREDICT) | from_disk: df_plot_pred_all = pd.read_csv(PATH_DF_PLOT_PRED_ALL_KR) df_plot_pred_all.index = df_plot_pred_all["date"] return df_plot_pred_all # prepare features dataset, data_std, data_mean = prepare_dataset_kr(df_feat_kr) # predict if settings.MODEL_TFLITE: json_list_list_x = prepare_to_lambda(dataset) resp = requests.post(URL_PREDICT_KR, json=json_list_list_x) print("status code : ", resp.status_code) if resp.status_code == 200: y_multi_pred = retrieve_from_lambda(resp) else: print("AWS Lamdba future pred ERROR!") df_plot_pred_all = pd.read_csv(PATH_DF_PLOT_PRED_ALL_KR) df_plot_pred_all.index = df_plot_pred_all["date"] return df_plot_pred_all else: # load model multi_step_model = tf.keras.models.load_model(PATH_MDL_MULTI_STEP_KR) list_x = [] # prepare data : very last days nb_max = NB_PERIOD_PLOT for I in range(nb_max, 0, -1): I_start = I * FUTURE_TARGET - PAST_HISTORY if I_start < 0: break I_end = I * FUTURE_TARGET list_x.append(np.array([dataset[I_start:I_end, :]])) # model prediction for I, x_multi in enumerate(list_x): if I: y_multi_pred = np.concatenate( [y_multi_pred, multi_step_model.predict(x_multi)], axis=1) else: y_multi_pred = multi_step_model.predict(x_multi) # convert in positive cases y_pos_pred = y_multi_pred * data_std[4] + data_mean[4] # list of dates K_days = y_pos_pred.shape[1] print("K_days = ", K_days) print("y_pos_pred.shape = ", y_pos_pred.shape) str_date_pred_1 = df_feat_kr.date.max() str_date_pred_0 = add_days(str_date_pred_1, -1 * K_days) list_dates_pred = generate_list_dates(str_date_pred_0, str_date_pred_1) # create df output df_plot_pred_all = pd.DataFrame(index=list_dates_pred, columns=["date"], data=list_dates_pred) # daily df_plot_pred_all["pos"] = y_pos_pred[0].astype(int) # Total : cumulate sum list_nb_cases = [] str_date_nb_0 = str_date_pred_0 for I in range(0, df_plot_pred_all["pos"].shape[0], FUTURE_TARGET): str_date_nb_0 = add_days(str_date_pred_0, I) nb_0 = df_feat_kr[df_feat_kr["date"] == str_date_nb_0]["nb_cases"][0] arr_nb = nb_0 + \ df_plot_pred_all.iloc[I:I+FUTURE_TARGET]["pos"].cumsum().values list_nb_cases = list_nb_cases + arr_nb.tolist() df_plot_pred_all["nb_cases"] = list_nb_cases # save for future pred df_plot_pred_all.to_csv(PATH_DF_PLOT_PRED_ALL_KR, index=False) return df_plot_pred_all