def logit_train(it): i, df_train, df_test, predictors, event = it logit = LogisticRegression(class_weight="balanced", solver="liblinear",\ max_iter=1000) logit_mod = logit.fit(df_train[i][list(predictors)], df_train[i][event]) p = logit_mod.predict_proba(df_test[i][list(predictors)])[:, 1] df_test_temp = df_test[i] df_test_temp.loc[:, "logit"] = p #Calculate the HSS/PSS for a range of probabilistic thresholds and save the maximums hss_out = [] pss_out = [] thresh_hss_out = [] thresh_pss_out = [] for t in np.arange(0, 1.01, 0.01): hss_p, thresh_hss = pss([t, df_test_temp, "logit", event, "hss"]) pss_p, thresh_pss = pss([t, df_test_temp, "logit", event, "pss"]) hss_out.append(hss_p) pss_out.append(pss_p) thresh_hss_out.append(thresh_hss) thresh_pss_out.append(thresh_pss) return [np.max(hss_out), np.max(pss_out), thresh_hss_out[np.argmax(hss_out)], \ thresh_pss_out[np.argmax(pss_out)] ]
def run_logit(): #BARRA logit = LogisticRegression(class_weight="balanced", solver="liblinear") pss_df, df_aws, df_sta = optimise_pss("/g/data/eg3/ab4502/ExtremeWind/points/"+\ "barra_allvars_2005_2018_2.pkl", T=1000, compute=False, l_thresh=2,\ is_pss="hss", model_name="barra_fc") #Convective AWS preds = ["lr36", "lr_freezing", "ml_el", "s06", "srhe_left", "Umean06"] event = "is_conv_aws" p = "t_totals" logit_mod = logit.fit(df_aws[preds], df_aws[event]) df_aws["logit"] = logit_mod.predict_proba(df_aws[preds])[:, 1] res = [ pss([t, df_aws, "logit", event, "hss"]) for t in np.linspace(0, 1, 100) ] hss = [res[i][0] for i in np.arange(len(res))] hss_thresh = [res[i][1] for i in np.arange(len(res))] hss_thresh_logit = hss_thresh[np.argmax(hss)] hss_logit = np.max(hss) res = [pss([t, df_aws, p, event, "hss"]) for t in \ np.linspace(np.percentile(df_sta.loc[:,p],50),\ np.percentile(df_sta.loc[:,p],99.5),100)] hss = [res[i][0] for i in np.arange(len(res))] hss_thresh = [res[i][1] for i in np.arange(len(res))] hss_thresh_p = hss_thresh[np.argmax(hss)] hss_p = np.max(hss) print("BARRA Conv AWS") print(p, "hss: ", hss_p, "thresh: ", hss_thresh_p) print("logit", "hss: ", hss_logit, "hss_thresh: ", hss_thresh_logit) #STA #preds = ["lr36","lr_freezing","mhgt","ml_el","s06","srhe_left","Umean06"] preds = ['qmean06', 'pwat', 'qmean01', 'sb_lcl', 'ddraft_temp'] event = "is_sta" p = "dcp" logit_mod = logit.fit(df_aws[preds], df_aws[event]) df_aws["logit"] = logit_mod.predict_proba(df_aws[preds])[:, 1] res = [ pss([t, df_aws, "logit", event, "hss"]) for t in np.linspace(0, 1, 100) ] hss = [res[i][0] for i in np.arange(len(res))] hss_thresh = [res[i][1] for i in np.arange(len(res))] hss_thresh_logit = hss_thresh[np.argmax(hss)] hss_logit = np.max(hss) res = [pss([t, df_aws, p, event, "hss"]) for t in \ np.linspace(np.percentile(df_sta.loc[:,p],50),\ np.percentile(df_sta.loc[:,p],99.5),100)] hss = [res[i][0] for i in np.arange(len(res))] hss_thresh = [res[i][1] for i in np.arange(len(res))] hss_thresh_p = hss_thresh[np.argmax(hss)] hss_p = np.max(hss) print("BARRA STA") print(p, "hss: ", hss_p, "thresh: ", hss_thresh_p) print("logit", "hss: ", hss_logit, "hss_thresh: ", hss_thresh_logit) #ERA5 pss_df, df_aws, df_sta = optimise_pss("/g/data/eg3/ab4502/ExtremeWind/points/"+\ "era5_allvars_2005_2018.pkl", T=1000, compute=False, l_thresh=2,\ is_pss="hss", model_name="era5") #Convective AWS preds = ["lr36", "mhgt", "ml_el", "qmean01", "srhe_left", "Umean06"] event = "is_conv_aws" p = "t_totals" logit_mod = logit.fit(df_aws[preds], df_aws[event]) df_aws["logit"] = logit_mod.predict_proba(df_aws[preds])[:, 1] res = [ pss([t, df_aws, "logit", event, "hss"]) for t in np.linspace(0, 1, 100) ] hss = [res[i][0] for i in np.arange(len(res))] hss_thresh = [res[i][1] for i in np.arange(len(res))] hss_thresh_logit = hss_thresh[np.argmax(hss)] hss_logit = np.max(hss) res = [pss([t, df_aws, p, event, "hss"]) for t in \ np.linspace(np.percentile(df_sta.loc[:,p],50),\ np.percentile(df_sta.loc[:,p],99.5),100)] hss = [res[i][0] for i in np.arange(len(res))] hss_thresh = [res[i][1] for i in np.arange(len(res))] hss_thresh_p = hss_thresh[np.argmax(hss)] hss_p = np.max(hss) print("ERA5 Conv AWS") print(p, "hss: ", hss_p, "thresh: ", hss_thresh_p) print("logit", "hss: ", hss_logit, "hss_thresh: ", hss_thresh_logit) #STA preds = ["lr36", "ml_cape", "srhe_left", "Umean06"] event = "is_sta" p = "dcp" logit_mod = logit.fit(df_aws[preds], df_aws[event]) df_aws["logit"] = logit_mod.predict_proba(df_aws[preds])[:, 1] res = [ pss([t, df_aws, "logit", event, "hss"]) for t in np.linspace(0, 1, 100) ] hss = [res[i][0] for i in np.arange(len(res))] hss_thresh = [res[i][1] for i in np.arange(len(res))] hss_thresh_logit = hss_thresh[np.argmax(hss)] hss_logit = np.max(hss) res = [pss([t, df_aws, p, event, "hss"]) for t in \ np.linspace(np.percentile(df_sta.loc[:,p],50),\ np.percentile(df_sta.loc[:,p],99.5),100)] hss = [res[i][0] for i in np.arange(len(res))] hss_thresh = [res[i][1] for i in np.arange(len(res))] hss_thresh_p = hss_thresh[np.argmax(hss)] hss_p = np.max(hss) print("ERA5 STA") print(p, "hss: ", hss_p, "thresh: ", hss_thresh_p) print("logit", "hss: ", hss_logit, "hss_thresh: ", hss_thresh_logit)