def get_all_eval_measures(res, endog, include_prc=False): predict = res.predict() measures = {} pred_table = ems.cm(predict, endog) measures["precision"] = ems.precision(pred_table) measures["recall"] = ems.recall(pred_table) measures["accuracy"] = ems.accuracy(pred_table) measures["f_score"] = ems.fscore_measure(pred_table) measures["rmse"] = ems.rmse(predict, endog) measures["mae"] = ems.mae(predict, endog) measures["auc"] = ems.auc(predict, endog) measures["llf"] = res.llf measures["aic"] = res.aic measures["bic"] = res.bic measures["prsquared"] = res.prsquared measures["df_model"] = res.df_model tn, fp, fn, tp = map(float, pred_table.flatten()) # WRT to 1 as positive label measures["tn"] = tn measures["fn"] = fn measures["fp"] = fp measures["tp"] = tp print "In eval measures function." if include_prc: ## Include the precision recall values prc = ems.prc(predict, endog, float_precision=3) measures["prc"] = prc return measures
def get_all_eval_measures(predict, endog, include_prc=False): measures = {} pred_table = ems.cm(predict, endog) measures["precision"] = ems.precision(pred_table) measures["recall"] = ems.recall(pred_table) measures["accuracy"] = ems.accuracy(pred_table) measures["f_score"] = ems.fscore_measure(pred_table) measures["rmse"] = ems.rmse(predict, endog) measures["mae"] = ems.mae(predict, endog) measures["auc"] = ems.auc(predict, endog) tn, fp, fn, tp = map(float, pred_table.flatten()) # WRT to 1 as positive label measures["tn"] = tn measures["fn"] = fn measures["fp"] = fp measures["tp"] = tp measures["tpr"] = tp * 1. / (tp + fn) measures["fpr"] = fp * 1. / (fp + tn) print "In eval measures function." if include_prc: print "Generating PRC AND ROC" ## Include the precision recall values prc = ems.prc(predict, endog, float_precision=3) measures["prc"] = prc roc = ems.roc(predict, endog, float_precision=3) measures["roc"] = roc return measures
def func3(x): return metrics.rmse(np.asarray(x), Obs[oname2].iloc[:, 1])
# return sp.integrate.quad(interp1d(d,y,kind='cubic'),d1,d2)/(d2-d1) # return UnivariateSpline(d,y,s=0).integral(d1,d2)/(d2-d1) return splint(d1, d2, splrep(d, y, k=min(len(d) - 1, 3), s=0)) / (d2 - d1) # return sp.integrate.quad(interp1d.splrep(d,y,k=len(d)-1,s=0),d1,d2)[0]/(d2-d1) obs = Obs[oname2].iloc[:, 1:].apply(func, axis=1) #print len(obs) KGE.loc[js2[j], oname] = metrics.kling_gupta(sim, obs, method='2012') MAE.loc[js2[j], oname] = metrics.meanabs(sim, obs) RMSE.loc[js2[j], oname] = metrics.rmse(sim, obs) corr.loc[js2[j], oname] = metrics.corr(sim, obs) if oname == outnames[0]: itot += nj # Clean the metrics dataframe to include only the successful runs common to all obs # Use MAE or RMSE, because KGE and corr can have NaN only for 'flat' succesful runs MAE.dropna(inplace=True) RMSE.dropna(inplace=True) js3 = MAE.index KGE = KGE.ix[js3] corr = corr.ix[js3] df_par = df_par.loc[:, js3]
for j in range(sim.shape[0]): jcomp = int(sim[j][0]) # Get sim outpts and apply conversion factor !! tmp = [ simfct[iobs] * sim[j][x] for x in range(1, simlen + 1) ] # Crop between desired time frame sim2 = [ tmp[idx] for idx in range(simlen) if simdate[idx] >= fitbeg[iobs] and simdate[idx] <= fitend[iobs] ] # Metrics md_nse = metrics.nash_sutcliff(sim2, obs) md_kge = metrics.kling_gupta(sim2, obs, method='2012') md_rmse = metrics.rmse(sim2, obs) #md_bias = metrics.bias(sim2,obs) md_corr = metrics.corr(sim2, obs) md_rstd = metrics.rstd(sim2, obs) # Write f_out.write(','.join([ str(i + 1), str(jcomp), str(md_nse), str(md_kge), str(md_rmse), str(md_corr), str(md_rstd) ]) + '\n') # Save the 'coordinates' of this sample (only the first time) if iobs == 0:
def fit_model(df, formula, title="Full", fp=None, filename="Model", save=False): """ Function to fit model, collect stats and save predictions and model. df: dataframe formula: formula title: title of model (Default: "Full") fp: File pointer (Default: None) filename: Model and data file prefix ("Model") save: Weather to save predictions, model or both or none ["Both", "Data", "Model", False] (Default: False) """ if df.shape[0] < 10: print "Too less instances. Skipping. Make sure you have atleast 10 instances." return None, None print "Modelling Model[%s] with instances %s" % (title, df.shape[0]) print "Using formula:\n %s" % (formula) print "Generating patsy matrices" y, X = patsy.dmatrices(formula, df, return_type="dataframe") print "Initializing model" model = Logit(y, X) print "Fitting model" res = model.fit() print title, "\n", res.summary2() print "Confusion Matrix:", res.pred_table() precision = ems.precision(res.pred_table()) recall = ems.recall(res.pred_table()) accuracy = ems.accuracy(res.pred_table()) f_score = ems.fscore_measure(res.pred_table()) rmse = ems.rmse(res.predict(), model.endog) mae = ems.mae(res.predict(), model.endog) auc = ems.auc(res.predict(), model.endog) prc = ems.prc(res.predict(), model.endog) prc_filename = "%s.pdf" % filename plot_prc(prc, prc_filename) evaluation_metrics = "[Model Measures]: Confusion Matrix: %s\nRMSE: %s\tMAE: %s\tAUC: %s\nPrecision: %s\tRecall: %s\tAccuracy: %s\tF1-Score: %s\nPRC:\n%s" % ( res.pred_table(), rmse, mae, auc, precision, recall, accuracy, f_score, prc_filename) print evaluation_metrics print "[save=%s]" % save, "" if save else "Not", "Saving Model to %s" % filename if fp is not None: print >> fp, "Modelling Model[%s] with instances %s" % (title, df.shape[0]) print >> fp, "Using formula:\n %s" % (formula) print >> fp, title, "\n", res.summary2() print >> fp, evaluation_metrics print >> fp, "[save=%s]" % save, "" if save else "Not", "Saving Model to %s" % filename model_save, data_save = False, False if save == "Both": model_save, data_save = True, True if save == "Model" or model_save: model_file = "%s.pkl" % filename res.save(model_file, remove_data=True) # Save model if save == "Data" or data_save: data_file = "%s.data.txt" % filename # Include predictions print "df.index", df.index save_data(df[["from_id", "is_self_cite"]], res.predict(), filename=data_file) print "Done Saving" return model, res
# Crop between desired time frame, and account for potential gaps in the obs sim = [ tmp2[idx + 1] for idx in range(lsim) if any(obst[obsnames[iobs]] == simt[idx]) == True ] #if i==21 and j==1: # tmp24 = [simt[idx] for idx in range(lsim) if simt[idx] in obst[obsnames[iobs]]] # print # print tmp24 # Increment cost function KGE[obsnames[iobs]][j - 1] = metrics.kling_gupta( sim, obs[obsnames[iobs]], method='2012') MAE[obsnames[iobs]][j - 1] = metrics.meanabs( sim, obs[obsnames[iobs]]) RMSE[obsnames[iobs]][j - 1] = metrics.rmse( sim, obs[obsnames[iobs]]) # A few prints #if j==1: #print obsnames[iobs] #print np.mean(sim), np.mean(np.ma.masked_array(obs[obsnames[iobs]],np.isnan(obs[obsnames[iobs]]))), len(sim), len(obs[obsnames[iobs]]) #print KGE[obsnames[iobs]][0], MAE[obsnames[iobs]][0], RMSE[obsnames[iobs]][0] if iobs == 0: # -- Parameters tmp3 = [tmp_par[j - 1][idx] for idx in range(1, npar + 1)] with open( os.getcwd() + '/' + outdir + '/' + MCname + '_parameters.txt', 'a') as f_out: f_out.write( str(i) + ',' + str(j) + ',' + str(itot) + ',' +
tmp = np.genfromtxt(f_in, delimiter='\t', skip_header=nts + 3, unpack=True)[1] #print i ,j ,len(tmp) if len(tmp) < lobs: j += 1 continue # Crop between desired time frame (based on the length of this time frame) sim = [tmp[idx] for idx in range(len(tmp) - ltf, len(tmp))] # Metrics md_nse = metrics.nash_sutcliff(sim, obs['Streamflow']) md_kge = metrics.kling_gupta(sim, obs['Streamflow'], method='2012') md_rmse = metrics.rmse(sim, obs['Streamflow']) md_bias = metrics.bias(sim, obs['Streamflow']) # Write f_out.write(','.join([ str(i + 1), str(j + 1), str(md_nse), str(md_kge), str(md_rmse), str(md_bias) ]) + '\n') # Save the 'coordinates' of this sample iok.append(i) jok.append(j) j += 1