def stats(modelgrid, inventory, dx=100., Nsamp=None, method='nearest', extent='inventory', bins=None, runtests=True, showplots=True, saveplots=False, filepath=None): """Run through suite of tests for models that output probability or index that varies between 0 and 1 :param modelgrid: Grid2D object of model results :param inventory: full file path to shapefile of inventory, must be in geographic coordinates, WGS84 :type inventory: string :param dx: Approximate sample spacing in meters, overwritten if Nsamp is defined :type dx: float :param Nsamp: Total number of samples desired - will choose optimal dx to get slightly more than this number of samples delete samples outside bounds and randomly delete others until sample number is exactly Nsamp :type Nsamp: integer :param method: method used for interp2d when transforming sampled model values back from projected coordinates to geographic coordinates - 'nearest', 'linear', or 'cubic' :type method: string :param extent: extent to include in sampling - 'inventory' or 'model' or custom bounds as tuple (xmin, ymin, xmax, ymax) - in lats and lons :param bins: bin edges to use for various binning and threshold statistical calculations. if None bins = [0, 0.2, 0.4, 0.6, 0.8, 1.] :param runtests: if True, will run various statistical tests, if False will just output sampled values :param showplots: if True, will disply the plots :param saveplots: if True, will save the plots :param filepath: Filepath for saved plots, if None, will save in current directory. Files are named with test name and time stamp :returns: * yespoints: Nx2 array of geographic coordinates of positive sample locations * nopoints: Nx2 array of geographic coordinates of negative sample locations * modelvalyes: N model output values corresponding to yespoints * modelvalno: N model output values corresponding to nopoints * results: dictionary of results of statistical tests. Will be empty if runtests=False {'Occ_nonocc': dict, 'SRC': dict, 'ROC': dict, 'AUC_ROC': float, 'Log_loss': float, 'GFC': dict, 'Pred_vs_Obs': dict, 'Brier': float, 'Brier_no': float, 'Brier_yes': float} """ plt.close('all') f = fiona.collection(inventory, 'r') shapes = list(f) bxmin, bymin, bxmax, bymax = f.bounds gdict = modelgrid.getGeoDict() if extent == 'model': extent = gdict.xmin, gdict.ymin, gdict.xmax, gdict.ymax elif extent == 'inventory': extent = bxmin, bymin, bxmax, bymax #yespoints, junk, nopoints, junk2, xvar, yvar, pshapes, proj = sampleFromShapes(shapes, extent, dx=dx, Nsamp=Nsamp, testPercent=100.) yespoints, nopoints, xvar, yvar, pshapes, proj = pointsFromShapes( shapes, extent, dx=dx, Nsamp=Nsamp) yesptx = [pt[0] for pt in yespoints] yespty = [pt[1] for pt in yespoints] noptx = [pt[0] for pt in nopoints] nopty = [pt[1] for pt in nopoints] #import pdb; pdb.set_trace() # Get values of model at those points lons = np.linspace(gdict.xmin, gdict.xmax, gdict.nx) lats = np.linspace(gdict.ymax, gdict.ymin, gdict.ny) if method.lower() == 'nearest': modelvalyes = [] modelvalno = [] for XX, YY in zip(yesptx, yespty): row = (np.abs(lats - YY)).argmin() col = (np.abs(lons - XX)).argmin() modelvalyes.append(modelgrid.getData()[row, col]) for XX, YY in zip(noptx, nopty): row = (np.abs(lats - YY)).argmin() col = (np.abs(lons - XX)).argmin() modelvalno.append(modelgrid.getData()[row, col]) else: func = interpolate.interp2d(lons, lats, modelgrid.getData(), kind=method.lower()) modelvalyes = np.array( [float(func(XX, YY)) for XX, YY in zip(yesptx, yespty)]) modelvalno = np.array( [float(func(XX, YY)) for XX, YY in zip(noptx, nopty)]) modelvalyes = np.nan_to_num( np.array(modelvalyes)) # replace nan with zeros modelvalno = np.nan_to_num(np.array(modelvalno)) # replace nan with zeros # Now run the desired tests and make the desired plots results = {} if runtests is True: # Brier score N = len(yespoints) + len(nopoints) yessum = np.sum([(val - 1)**2 for val in modelvalyes]) nosum = np.sum([(val)**2 for val in modelvalno]) results['Brier_yes'] = yessum / len(modelvalyes) results['Brier_no'] = nosum / len(modelvalno) results['Brier'] = (yessum + nosum) / N print(( 'Brier scores: overall %0.3f\nBrier_yes score: %0.3f\nBrier_no score %0.3f' % (results['Brier'], results['Brier_yes'], results['Brier_no']))) # Logarithmic score tempno = np.array(modelvalno).copy() tempyes = np.array(modelvalyes).copy() tempno[tempno == 0] = 1.e-15 tempyes[tempyes == 0] = 1.e-15 results['Log_loss'] = -(np.sum(np.log(tempyes)) + np.sum(np.log(1. - tempno))) / N print(('Log loss score: %0.3f' % (results['Log_loss'], ))) if bins is None: bins = [0, 0.2, 0.4, 0.6, 0.8, 1.] binvec = [] observed = [] percyes = [] percno = [] overall_tot = len(modelvalyes) + len(modelvalno) for i in range(len(bins[:-1])): binvec.append(bins[i] + (bins[i + 1] - bins[i]) / 2) yestot = np.sum([ (modelvalyes > bins[i]) & (modelvalyes < bins[i + 1]) ]) notot = np.sum([(modelvalno > bins[i]) & (modelvalno < bins[i + 1]) ]) if notot + yestot != 0: observed.append(float(yestot) / (yestot + notot)) else: observed.append('nan') percyes.append((yestot / float(overall_tot)) * 100.) percno.append((notot / float(overall_tot)) * 100.) plt.ioff() # Predicted vs. Observed ratios fig = plt.figure() ax = fig.add_subplot(111) ax.plot(binvec, observed, '-o') ax.plot([0] + binvec, [0] + binvec, '--', color='gray') ax.set_xlabel('Expected ratio') ax.set_ylabel('Observed ratio') ax.set_xlim([bins[0], bins[-1]]) ax.set_title('Predicted vs. Observed') results['Pred_vs_Obs'] = {'binvec': binvec, 'observed': observed} # Ground failure occurrence/nonoccurrence fig1 = plt.figure() ax1 = fig1.add_subplot(111) wid = (bins[1] - bins[0]) / 2.5 rects1 = ax1.bar(np.array(bins[:-1]), percyes, width=wid) rects2 = ax1.bar(np.array(bins[:-1]) + wid, percno, width=wid, color='r') ax1.set_xlabel('Predicted susceptibility range') ax1.set_ylabel('% of samples') ax1.legend((rects1[0], rects2[0]), ('Occurrence', 'Nonoccurrence')) ax1.set_title('Occurrence vs. Nonoccurrence') results['Occ_nonocc'] = { 'bins': bins, 'percyes': percyes, 'percno': percno } # Ground failure capture for various thresholds gfc = [] for val in bins: gfc.append(np.sum([modelvalyes > val]) / float(len(yespoints))) fig2 = plt.figure() ax2 = fig2.add_subplot(111) ax2.plot(bins, gfc, 'o-') ax2.set_xlabel('Threshold') ax2.set_ylabel(r'%GFC') ax2.set_title('Ground Failure Capture') results['GFC'] = {'thresholds': bins, 'gfc': gfc} # ROC curves fpr, tpr, thresholds = roc_curve( np.concatenate((np.ones(len(yespoints)), np.zeros(len(nopoints)))), np.concatenate((modelvalyes, modelvalno))) fig3 = plt.figure() ax3 = fig3.add_subplot(111) ax3.plot(fpr, tpr) ax3.set_xlabel('False positive rate') ax3.set_ylabel('True positive rate') ax3.set_xlim([0, 1.]) ax3.set_ylim([0, 1.]) ax3.plot(fpr, fpr, '--', color='gray') ax3.set_title('ROC curve') results['ROC'] = {'thresholds': bins, 'gfc': gfc} results['AUC_ROC'] = roc_auc_score( np.concatenate((np.ones(len(yespoints)), np.zeros(len(nopoints)))), np.concatenate((modelvalyes, modelvalno))) print(('AUC_ROC: %0.3f' % (results['AUC_ROC'], ))) ax3.text(0.8, 0.2, 'AUC: %0.3f' % results['AUC_ROC']) # Success rate curves sucbin = np.linspace(0, 1., 100) prop = [] realvals = np.concatenate( (np.ones(len(yespoints)), np.zeros(len(nopoints)))) predvals = np.concatenate((modelvalyes, modelvalno)) indx = np.argsort(predvals) predvals = predvals[indx] realvals = realvals[indx] for val in sucbin: prop.append(np.sum(realvals[predvals < val]) / len(yespoints)) fig4 = plt.figure() ax4 = fig4.add_subplot(111) ax4.plot(sucbin, prop) ax4.set_xlabel('Success Rate Curve') ax4.set_ylabel('Proportion of actual occurrences') ax4.set_title('Proportion of Study Area') AUC = auc(sucbin, prop) print(('AUC_SRC: %0.3f' % AUC)) ax4.text(0.8, 0.2, 'AUC: %0.3f' % AUC) ax4.set_xlim([0, 1.]) ax4.set_ylim([0, 1.]) results['SRC'] = {'xvals': sucbin, 'proportion': prop, 'auc': AUC} if showplots is True: plt.show() if saveplots is True: if filepath is None: filepath = os.getcwd() import datetime time1 = datetime.datetime.utcnow().strftime('%d%b%Y_%H%M') fig.savefig( os.path.join(filepath, 'Pred_vs_obs_%s.pdf' % (time1, ))) fig1.savefig( os.path.join(filepath, 'Occ_nonocc_%s.pdf' % (time1, ))) fig2.savefig(os.path.join(filepath, 'GFC_%s.pdf' % (time1, ))) fig3.savefig(os.path.join(filepath, 'ROC_%s.pdf' % (time1, ))) fig4.savefig(os.path.join(filepath, 'SRC_%s.pdf' % (time1, ))) return yespoints, nopoints, modelvalyes, modelvalno, results