def get_all_eval_measures(res, endog, include_prc=False):
    predict = res.predict()
    measures = {}
    pred_table = ems.cm(predict, endog)
    measures["precision"] = ems.precision(pred_table)
    measures["recall"] = ems.recall(pred_table)
    measures["accuracy"] = ems.accuracy(pred_table)
    measures["f_score"] = ems.fscore_measure(pred_table)
    measures["rmse"] = ems.rmse(predict, endog)
    measures["mae"] = ems.mae(predict, endog)
    measures["auc"] = ems.auc(predict, endog)
    measures["llf"] = res.llf
    measures["aic"] = res.aic
    measures["bic"] = res.bic
    measures["prsquared"] = res.prsquared
    measures["df_model"] = res.df_model
    tn, fp, fn, tp = map(float,
                         pred_table.flatten())  # WRT to 1 as positive label
    measures["tn"] = tn
    measures["fn"] = fn
    measures["fp"] = fp
    measures["tp"] = tp
    print "In eval measures function."
    if include_prc:
        ## Include the precision recall values
        prc = ems.prc(predict, endog, float_precision=3)
        measures["prc"] = prc
    return measures
Example #2
0
def get_all_eval_measures(predict, endog, include_prc=False):
    measures = {}
    pred_table = ems.cm(predict, endog)
    measures["precision"] = ems.precision(pred_table)
    measures["recall"] = ems.recall(pred_table)
    measures["accuracy"] = ems.accuracy(pred_table)
    measures["f_score"] = ems.fscore_measure(pred_table)
    measures["rmse"] = ems.rmse(predict, endog)
    measures["mae"] = ems.mae(predict, endog)
    measures["auc"] = ems.auc(predict, endog)
    tn, fp, fn, tp = map(float,
                         pred_table.flatten())  # WRT to 1 as positive label
    measures["tn"] = tn
    measures["fn"] = fn
    measures["fp"] = fp
    measures["tp"] = tp
    measures["tpr"] = tp * 1. / (tp + fn)
    measures["fpr"] = fp * 1. / (fp + tn)
    print "In eval measures function."
    if include_prc:
        print "Generating PRC AND ROC"
        ## Include the precision recall values
        prc = ems.prc(predict, endog, float_precision=3)
        measures["prc"] = prc
        roc = ems.roc(predict, endog, float_precision=3)
        measures["roc"] = roc
    return measures
Example #3
0
 def func3(x):
     return metrics.rmse(np.asarray(x), Obs[oname2].iloc[:, 1])
Example #4
0
                        #    return sp.integrate.quad(interp1d(d,y,kind='cubic'),d1,d2)/(d2-d1)
                        # return UnivariateSpline(d,y,s=0).integral(d1,d2)/(d2-d1)
                        return splint(d1, d2,
                                      splrep(d, y, k=min(len(d) - 1, 3),
                                             s=0)) / (d2 - d1)

                    # return sp.integrate.quad(interp1d.splrep(d,y,k=len(d)-1,s=0),d1,d2)[0]/(d2-d1)

                    obs = Obs[oname2].iloc[:, 1:].apply(func, axis=1)
                    #print len(obs)

                    KGE.loc[js2[j], oname] = metrics.kling_gupta(sim,
                                                                 obs,
                                                                 method='2012')
                    MAE.loc[js2[j], oname] = metrics.meanabs(sim, obs)
                    RMSE.loc[js2[j], oname] = metrics.rmse(sim, obs)
                    corr.loc[js2[j], oname] = metrics.corr(sim, obs)

            if oname == outnames[0]:
                itot += nj

        # Clean the metrics dataframe to include only the successful runs common to all obs
        # Use MAE or RMSE, because KGE and corr can have NaN only for 'flat' succesful runs
        MAE.dropna(inplace=True)
        RMSE.dropna(inplace=True)

        js3 = MAE.index

        KGE = KGE.ix[js3]
        corr = corr.ix[js3]
        df_par = df_par.loc[:, js3]
Example #5
0
 for j in range(sim.shape[0]):
     jcomp = int(sim[j][0])
     # Get sim outpts and apply conversion factor !!
     tmp = [
         simfct[iobs] * sim[j][x] for x in range(1, simlen + 1)
     ]
     # Crop between desired time frame
     sim2 = [
         tmp[idx] for idx in range(simlen)
         if simdate[idx] >= fitbeg[iobs]
         and simdate[idx] <= fitend[iobs]
     ]
     # Metrics
     md_nse = metrics.nash_sutcliff(sim2, obs)
     md_kge = metrics.kling_gupta(sim2, obs, method='2012')
     md_rmse = metrics.rmse(sim2, obs)
     #md_bias = metrics.bias(sim2,obs)
     md_corr = metrics.corr(sim2, obs)
     md_rstd = metrics.rstd(sim2, obs)
     # Write
     f_out.write(','.join([
         str(i + 1),
         str(jcomp),
         str(md_nse),
         str(md_kge),
         str(md_rmse),
         str(md_corr),
         str(md_rstd)
     ]) + '\n')
     # Save the 'coordinates' of this sample (only the first time)
     if iobs == 0:
Example #6
0
def fit_model(df,
              formula,
              title="Full",
              fp=None,
              filename="Model",
              save=False):
    """
  Function to fit model, collect stats and save predictions and model.
  df: dataframe
  formula: formula
  title: title of model (Default: "Full")
  fp: File pointer (Default: None)
  filename: Model and data file prefix ("Model")
  save: Weather to save predictions, model or both or none ["Both", "Data", "Model", False] (Default: False)
  """
    if df.shape[0] < 10:
        print "Too less instances. Skipping. Make sure you have atleast 10 instances."
        return None, None
    print "Modelling Model[%s] with instances %s" % (title, df.shape[0])
    print "Using formula:\n %s" % (formula)
    print "Generating patsy matrices"
    y, X = patsy.dmatrices(formula, df, return_type="dataframe")
    print "Initializing model"
    model = Logit(y, X)
    print "Fitting model"
    res = model.fit()
    print title, "\n", res.summary2()
    print "Confusion Matrix:", res.pred_table()
    precision = ems.precision(res.pred_table())
    recall = ems.recall(res.pred_table())
    accuracy = ems.accuracy(res.pred_table())
    f_score = ems.fscore_measure(res.pred_table())
    rmse = ems.rmse(res.predict(), model.endog)
    mae = ems.mae(res.predict(), model.endog)
    auc = ems.auc(res.predict(), model.endog)
    prc = ems.prc(res.predict(), model.endog)
    prc_filename = "%s.pdf" % filename
    plot_prc(prc, prc_filename)
    evaluation_metrics = "[Model Measures]: Confusion Matrix: %s\nRMSE: %s\tMAE: %s\tAUC: %s\nPrecision: %s\tRecall: %s\tAccuracy: %s\tF1-Score: %s\nPRC:\n%s" % (
        res.pred_table(), rmse, mae, auc, precision, recall, accuracy, f_score,
        prc_filename)
    print evaluation_metrics
    print "[save=%s]" % save, "" if save else "Not", "Saving Model to %s" % filename
    if fp is not None:
        print >> fp, "Modelling Model[%s] with instances %s" % (title,
                                                                df.shape[0])
        print >> fp, "Using formula:\n %s" % (formula)
        print >> fp, title, "\n", res.summary2()
        print >> fp, evaluation_metrics
        print >> fp, "[save=%s]" % save, "" if save else "Not", "Saving Model to %s" % filename
    model_save, data_save = False, False
    if save == "Both":
        model_save, data_save = True, True
    if save == "Model" or model_save:
        model_file = "%s.pkl" % filename
        res.save(model_file, remove_data=True)  # Save model
    if save == "Data" or data_save:
        data_file = "%s.data.txt" % filename  # Include predictions
        print "df.index", df.index
        save_data(df[["from_id", "is_self_cite"]],
                  res.predict(),
                  filename=data_file)
    print "Done Saving"
    return model, res
Example #7
0
                # Crop between desired time frame, and account for potential gaps in the obs
                sim = [
                    tmp2[idx + 1] for idx in range(lsim)
                    if any(obst[obsnames[iobs]] == simt[idx]) == True
                ]
                #if i==21 and j==1:
                #    tmp24 = [simt[idx] for idx in range(lsim) if simt[idx] in obst[obsnames[iobs]]]
                #    print
                #    print tmp24

                # Increment cost function
                KGE[obsnames[iobs]][j - 1] = metrics.kling_gupta(
                    sim, obs[obsnames[iobs]], method='2012')
                MAE[obsnames[iobs]][j - 1] = metrics.meanabs(
                    sim, obs[obsnames[iobs]])
                RMSE[obsnames[iobs]][j - 1] = metrics.rmse(
                    sim, obs[obsnames[iobs]])

                # A few prints
                #if j==1:
                #print obsnames[iobs]
                #print np.mean(sim), np.mean(np.ma.masked_array(obs[obsnames[iobs]],np.isnan(obs[obsnames[iobs]]))), len(sim), len(obs[obsnames[iobs]])
                #print KGE[obsnames[iobs]][0], MAE[obsnames[iobs]][0], RMSE[obsnames[iobs]][0]

                if iobs == 0:
                    # -- Parameters
                    tmp3 = [tmp_par[j - 1][idx] for idx in range(1, npar + 1)]
                    with open(
                            os.getcwd() + '/' + outdir + '/' + MCname +
                            '_parameters.txt', 'a') as f_out:
                        f_out.write(
                            str(i) + ',' + str(j) + ',' + str(itot) + ',' +
Example #8
0
                tmp = np.genfromtxt(f_in,
                                    delimiter='\t',
                                    skip_header=nts + 3,
                                    unpack=True)[1]
                #print i ,j ,len(tmp)
                if len(tmp) < lobs:
                    j += 1
                    continue
                # Crop between desired time frame (based on the length of this time frame)
                sim = [tmp[idx] for idx in range(len(tmp) - ltf, len(tmp))]
                # Metrics
                md_nse = metrics.nash_sutcliff(sim, obs['Streamflow'])
                md_kge = metrics.kling_gupta(sim,
                                             obs['Streamflow'],
                                             method='2012')
                md_rmse = metrics.rmse(sim, obs['Streamflow'])
                md_bias = metrics.bias(sim, obs['Streamflow'])
                # Write
                f_out.write(','.join([
                    str(i + 1),
                    str(j + 1),
                    str(md_nse),
                    str(md_kge),
                    str(md_rmse),
                    str(md_bias)
                ]) + '\n')
                # Save the 'coordinates' of this sample
                iok.append(i)
                jok.append(j)
                j += 1