Ejemplo n.º 1
0
def logit_train(it):

    i, df_train, df_test, predictors, event = it
    logit = LogisticRegression(class_weight="balanced", solver="liblinear",\
     max_iter=1000)
    logit_mod = logit.fit(df_train[i][list(predictors)], df_train[i][event])
    p = logit_mod.predict_proba(df_test[i][list(predictors)])[:, 1]
    df_test_temp = df_test[i]
    df_test_temp.loc[:, "logit"] = p
    #Calculate the HSS/PSS for a range of probabilistic thresholds and save the maximums
    hss_out = []
    pss_out = []
    thresh_hss_out = []
    thresh_pss_out = []
    for t in np.arange(0, 1.01, 0.01):
        hss_p, thresh_hss = pss([t, df_test_temp, "logit", event, "hss"])
        pss_p, thresh_pss = pss([t, df_test_temp, "logit", event, "pss"])
        hss_out.append(hss_p)
        pss_out.append(pss_p)
        thresh_hss_out.append(thresh_hss)
        thresh_pss_out.append(thresh_pss)

    return [np.max(hss_out), np.max(pss_out), thresh_hss_out[np.argmax(hss_out)], \
      thresh_pss_out[np.argmax(pss_out)] ]
Ejemplo n.º 2
0
def run_logit():

    #BARRA
    logit = LogisticRegression(class_weight="balanced", solver="liblinear")
    pss_df, df_aws, df_sta = optimise_pss("/g/data/eg3/ab4502/ExtremeWind/points/"+\
     "barra_allvars_2005_2018_2.pkl", T=1000, compute=False, l_thresh=2,\
     is_pss="hss", model_name="barra_fc")
    #Convective AWS
    preds = ["lr36", "lr_freezing", "ml_el", "s06", "srhe_left", "Umean06"]
    event = "is_conv_aws"
    p = "t_totals"
    logit_mod = logit.fit(df_aws[preds], df_aws[event])
    df_aws["logit"] = logit_mod.predict_proba(df_aws[preds])[:, 1]
    res = [
        pss([t, df_aws, "logit", event, "hss"])
        for t in np.linspace(0, 1, 100)
    ]
    hss = [res[i][0] for i in np.arange(len(res))]
    hss_thresh = [res[i][1] for i in np.arange(len(res))]
    hss_thresh_logit = hss_thresh[np.argmax(hss)]
    hss_logit = np.max(hss)
    res = [pss([t, df_aws, p, event, "hss"]) for t in \
     np.linspace(np.percentile(df_sta.loc[:,p],50),\
         np.percentile(df_sta.loc[:,p],99.5),100)]
    hss = [res[i][0] for i in np.arange(len(res))]
    hss_thresh = [res[i][1] for i in np.arange(len(res))]
    hss_thresh_p = hss_thresh[np.argmax(hss)]
    hss_p = np.max(hss)
    print("BARRA Conv AWS")
    print(p, "hss: ", hss_p, "thresh: ", hss_thresh_p)
    print("logit", "hss: ", hss_logit, "hss_thresh: ", hss_thresh_logit)
    #STA
    #preds = ["lr36","lr_freezing","mhgt","ml_el","s06","srhe_left","Umean06"]
    preds = ['qmean06', 'pwat', 'qmean01', 'sb_lcl', 'ddraft_temp']
    event = "is_sta"
    p = "dcp"
    logit_mod = logit.fit(df_aws[preds], df_aws[event])
    df_aws["logit"] = logit_mod.predict_proba(df_aws[preds])[:, 1]
    res = [
        pss([t, df_aws, "logit", event, "hss"])
        for t in np.linspace(0, 1, 100)
    ]
    hss = [res[i][0] for i in np.arange(len(res))]
    hss_thresh = [res[i][1] for i in np.arange(len(res))]
    hss_thresh_logit = hss_thresh[np.argmax(hss)]
    hss_logit = np.max(hss)
    res = [pss([t, df_aws, p, event, "hss"]) for t in \
     np.linspace(np.percentile(df_sta.loc[:,p],50),\
         np.percentile(df_sta.loc[:,p],99.5),100)]
    hss = [res[i][0] for i in np.arange(len(res))]
    hss_thresh = [res[i][1] for i in np.arange(len(res))]
    hss_thresh_p = hss_thresh[np.argmax(hss)]
    hss_p = np.max(hss)
    print("BARRA STA")
    print(p, "hss: ", hss_p, "thresh: ", hss_thresh_p)
    print("logit", "hss: ", hss_logit, "hss_thresh: ", hss_thresh_logit)

    #ERA5
    pss_df, df_aws, df_sta = optimise_pss("/g/data/eg3/ab4502/ExtremeWind/points/"+\
     "era5_allvars_2005_2018.pkl", T=1000, compute=False, l_thresh=2,\
     is_pss="hss", model_name="era5")
    #Convective AWS
    preds = ["lr36", "mhgt", "ml_el", "qmean01", "srhe_left", "Umean06"]
    event = "is_conv_aws"
    p = "t_totals"
    logit_mod = logit.fit(df_aws[preds], df_aws[event])
    df_aws["logit"] = logit_mod.predict_proba(df_aws[preds])[:, 1]
    res = [
        pss([t, df_aws, "logit", event, "hss"])
        for t in np.linspace(0, 1, 100)
    ]
    hss = [res[i][0] for i in np.arange(len(res))]
    hss_thresh = [res[i][1] for i in np.arange(len(res))]
    hss_thresh_logit = hss_thresh[np.argmax(hss)]
    hss_logit = np.max(hss)
    res = [pss([t, df_aws, p, event, "hss"]) for t in \
     np.linspace(np.percentile(df_sta.loc[:,p],50),\
         np.percentile(df_sta.loc[:,p],99.5),100)]
    hss = [res[i][0] for i in np.arange(len(res))]
    hss_thresh = [res[i][1] for i in np.arange(len(res))]
    hss_thresh_p = hss_thresh[np.argmax(hss)]
    hss_p = np.max(hss)
    print("ERA5 Conv AWS")
    print(p, "hss: ", hss_p, "thresh: ", hss_thresh_p)
    print("logit", "hss: ", hss_logit, "hss_thresh: ", hss_thresh_logit)
    #STA
    preds = ["lr36", "ml_cape", "srhe_left", "Umean06"]
    event = "is_sta"
    p = "dcp"
    logit_mod = logit.fit(df_aws[preds], df_aws[event])
    df_aws["logit"] = logit_mod.predict_proba(df_aws[preds])[:, 1]
    res = [
        pss([t, df_aws, "logit", event, "hss"])
        for t in np.linspace(0, 1, 100)
    ]
    hss = [res[i][0] for i in np.arange(len(res))]
    hss_thresh = [res[i][1] for i in np.arange(len(res))]
    hss_thresh_logit = hss_thresh[np.argmax(hss)]
    hss_logit = np.max(hss)
    res = [pss([t, df_aws, p, event, "hss"]) for t in \
     np.linspace(np.percentile(df_sta.loc[:,p],50),\
         np.percentile(df_sta.loc[:,p],99.5),100)]
    hss = [res[i][0] for i in np.arange(len(res))]
    hss_thresh = [res[i][1] for i in np.arange(len(res))]
    hss_thresh_p = hss_thresh[np.argmax(hss)]
    hss_p = np.max(hss)
    print("ERA5 STA")
    print(p, "hss: ", hss_p, "thresh: ", hss_thresh_p)
    print("logit", "hss: ", hss_logit, "hss_thresh: ", hss_thresh_logit)