def countpix(input_raster, value=1, blk_rows=0): """Count the number of pixels having a specific value. Count the number of pixels (and the corresponding area in ha) having a specific value. :param input_raster: Input raster file. :param value: Target value. :param blk_rows: if > 0, number of lines per block. :return: A dictionary with the number of pixels having the specified value (npix) and the total area (area, in ha). """ # Read raster rasterR = gdal.Open(input_raster) rasterB = rasterR.GetRasterBand(1) # Make blocks blockinfo = makeblock(input_raster, blk_rows=blk_rows) nblock = blockinfo[0] nblock_x = blockinfo[1] x = blockinfo[3] y = blockinfo[4] nx = blockinfo[5] ny = blockinfo[6] print("Divide region in " + str(nblock) + " blocks") # Number of pixels with a given value print("Compute the number of pixels with value=" + str(value)) npix = 0 # Loop on blocks of data for b in range(nblock): # Progress bar progress_bar(nblock, b + 1) # Position in 1D-arrays px = b % nblock_x py = b / nblock_x # Read the data rasterA = rasterB.ReadAsArray(x[px], y[py], nx[px], ny[py]) # Identify pixels (x/y coordinates) which are deforested pix = np.nonzero(rasterA == value) npix += len(pix[0]) # Compute area print("Compute the corresponding area in ha") gt = rasterR.GetGeoTransform() pix_area = gt[1] * (-gt[5]) area = pix_area * npix / 10000 # Results return ({'npix': npix, 'area': area})
def emissions(input_stocks="data/AGB.tif", input_forest="output/forest_cover_2050.tif", coefficient=0.47, blk_rows=128): """Predict the carbon emissions associated to future deforestation. This function predicts the carbon emissions associated to future deforestation. Computation are done by block and can be performed on large geographical areas. :param input_stocks: path to raster of biomass or carbon stocks (in Mg/ha). :param input_forest: path to forest-cover change raster (0=deforestation). :param coefficient: coefficient to convert stocks in MgC/ha (can be 1). :param blk_rows: if > 0, number of rows for computation by block. :return: emissions of carbon in MgC. """ # Landscape variables from forest raster forestR = gdal.Open(input_forest) gt = forestR.GetGeoTransform() ncol = forestR.RasterXSize nrow = forestR.RasterYSize Xmin = gt[0] Xmax = gt[0] + gt[1] * ncol Ymin = gt[3] + gt[5] * nrow Ymax = gt[3] # Make vrt print("Make virtual raster") raster_list = [input_forest, input_stocks] input_var = " ".join(raster_list) dirname = os.path.dirname(input_forest) output_vrt = os.path.join(dirname, "var.vrt") param = [ "gdalbuildvrt", "-overwrite", "-separate", "-resolution user", "-te", str(Xmin), str(Ymin), str(Xmax), str(Ymax), "-tr", str(gt[1]), str(-gt[5]), output_vrt, input_var ] cmd_gdalbuildvrt = " ".join(param) os.system(cmd_gdalbuildvrt) # Load vrt file stack = gdal.Open(output_vrt) # # NoData value for stocks # stocksB = stack.GetRasterBand(2) # stocksND = stocksB.GetNoDataValue() # Make blocks blockinfo = makeblock(output_vrt, blk_rows=blk_rows) nblock = blockinfo[0] nblock_x = blockinfo[1] x = blockinfo[3] y = blockinfo[4] nx = blockinfo[5] ny = blockinfo[6] print("Divide region in " + str(nblock) + " blocks") # Computation by block # Total sum sum_Stocks = 0 # Message print("Compute carbon emissions by block") # Loop on blocks of data for b in range(nblock): # Progress bar progress_bar(nblock, b + 1) # Position in 1D-arrays px = b % nblock_x py = b / nblock_x # Data for one block of the stack (shape = (nband,nrow,ncol)) data = stack.ReadAsArray(x[px], y[py], nx[px], ny[py]) data_Stocks = data[1] # data_Stocks[data_Stocks == StocksND] = 0 # Previous line doesn't work because StocksND # differs from NoData value in ReadAsArray data_Stocks[data_Stocks < 0] = 0 data_Forest = data[0] # Sum of emitted stocks sum_Stocks = sum_Stocks + np.sum(data_Stocks[data_Forest == 0]) # Pixel area (in ha) Area = gt[1] * (-gt[5]) / 10000 # Carbon emissions in Mg Carbon = sum_Stocks * coefficient * Area Carbon = np.int(np.rint(Carbon)) # Return carbon emissions return (Carbon)
def deforest(input_raster, hectares, output_file="output/fcc.tif", blk_rows=128): """Function to map the future forest-cover change. This function computes the future forest cover map based on (i) a raster of probability of deforestation (rescaled from 1 to 65535), and (ii) a surface (in hectares) to be deforested. :param input_raster: raster of probability of deforestation (1 to 65535 with 0 as nodata value). :param hectares: number of hectares to deforest. :param output_file: name of the raster file for forest cover map. :param blk_rows: if > 0, number of rows for block (else 256x256). :param figsize: figure size in inches. :param dpi: resolution for output image. :return: a tuple of statistics (hectares, frequence, threshold, error). """ # Load raster and band probR = gdal.Open(input_raster) probB = probR.GetRasterBand(1) gt = probR.GetGeoTransform() proj = probR.GetProjection() ncol = probR.RasterXSize nrow = probR.RasterYSize # Number of pixels to deforest surface_pixel = -gt[1] * gt[5] ndefor = np.around((hectares * 10000) / surface_pixel).astype(np.int) # Make blocks blockinfo = makeblock(input_raster, blk_rows=blk_rows) nblock = blockinfo[0] nblock_x = blockinfo[1] x = blockinfo[3] y = blockinfo[4] nx = blockinfo[5] ny = blockinfo[6] print("Divide region in " + str(nblock) + " blocks") # Compute the total number of forest pixels print("Compute the total number of forest pixels") nfp = 0 # Loop on blocks of data for b in range(nblock): # Progress bar progress_bar(nblock, b + 1) # Position in 1D-arrays px = b % nblock_x py = b / nblock_x # Data for one block data = probB.ReadAsArray(x[px], y[py], nx[px], ny[py]) forpix = np.nonzero(data != 0) nfp += len(forpix[0]) # Compute the histogram of values # print("Compute the histogram of values") # nvalues = 65635 # counts = np.zeros(nvalues, dtype=np.float) # Loop on blocks of data # for b in range(nblock): # Progress bar # progress_bar(nblock, b+1) # Position in 1D-arrays # px = b % nblock_x # py = b / nblock_x # Data for one block # data = probB.ReadAsArray(x[px], y[py], nx[px], ny[py]) # flat_data = data.flatten() # flat_data_nonzero = flat_data[flat_data != 0] # for i in flat_data_nonzero: # counts[i-1] += 1.0/nfp # Compute the histogram of values nvalues = 65635 counts = probB.GetHistogram(0.5, 65535.5, nvalues, 0, 0) # If deforestation < forest if (ndefor < nfp): # Identify threshold print("Identify threshold") quant = ndefor / (nfp * 1.0) cS = 0.0 cumSum = np.zeros(nvalues, dtype=np.float) go_on = True for i in np.arange(nvalues - 1, -1, -1): cS += counts[i] / (nfp * 1.0) cumSum[i] = cS if (cS >= quant) & (go_on is True): go_on = False index = i threshold = index + 1 # Minimize error print("Minimize error on deforested hectares") diff_inf = ndefor - cumSum[index + 1] * nfp diff_sup = cumSum[index] * nfp - ndefor if diff_sup >= diff_inf: index = index + 1 threshold = index + 1 # If deforestation > forest (everything is deforested) else: index = 0 threshold = 1 # Raster of predictions print("Create a raster file on disk for forest-cover change") driver = gdal.GetDriverByName("GTiff") fccR = driver.Create(output_file, ncol, nrow, 1, gdal.GDT_Byte, ["COMPRESS=LZW", "PREDICTOR=2", "BIGTIFF=YES"]) fccR.SetGeoTransform(gt) fccR.SetProjection(proj) fccB = fccR.GetRasterBand(1) fccB.SetNoDataValue(255) # Write raster of future fcc print("Write raster of future forest-cover change") ndc = 0 # Loop on blocks of data for b in range(nblock): # Progress bar progress_bar(nblock, b + 1) # Position in 1D-arrays px = b % nblock_x py = b / nblock_x # Data for one block prob_data = probB.ReadAsArray(x[px], y[py], nx[px], ny[py]) # Number of pixels that are really deforested deforpix = np.nonzero(prob_data >= threshold) ndc += len(deforpix[0]) # Forest-cover change for_data = np.ones(shape=prob_data.shape, dtype=np.int8) for_data = for_data * 255 # nodata for_data[prob_data != 0] = 1 for_data[deforpix] = 0 fccB.WriteArray(for_data, x[px], y[py]) # Compute statistics print("Compute statistics") fccB.FlushCache() # Write cache data to disk fccB.ComputeStatistics(False) # Build overviews print("Build overview") fccR.BuildOverviews("nearest", [4, 8, 16, 32]) # Dereference driver fccB = None del (fccR) # Estimates of error on deforested hectares error = (ndc * surface_pixel / 10000.0) - hectares # Return results stats = (counts, threshold, error, hectares) return (stats)
def predict(model, var_dir="data", input_cell_raster="output/rho.tif", input_forest_raster="data/forest.tif", output_file="output/pred_binomial_iCAR.tif", blk_rows=128): """Predict the spatial probability of deforestation from a model. This function predicts the spatial probability of deforestation from a model_binomial_iCAR model. Computation are done by block and can be performed on large geographical areas. :param model: model_binomial_iCAR model to predict from. :param var_dir: directory with rasters (.tif) of explicative variables. :param input_cell_raster: path to raster of rho values for spatial cells. :param input_forest_raster: path to forest raster (1 for forest). :param output_file: name of the raster file to output the probability map. :param blk_rows: if > 0, number of rows for computation by block. """ # Mask on forest fmaskR = gdal.Open(input_forest_raster) fmaskB = fmaskR.GetRasterBand(1) # Landscape variables from forest raster gt = fmaskR.GetGeoTransform() ncol = fmaskR.RasterXSize nrow = fmaskR.RasterYSize Xmin = gt[0] Xmax = gt[0] + gt[1] * ncol Ymin = gt[3] + gt[5] * nrow Ymax = gt[3] # Raster list var_tif = var_dir + "/*.tif" raster_list = glob(var_tif) raster_list.sort() # Sort names raster_list.append(input_cell_raster) raster_names = [] for i in range(len(raster_list)): fname = os.path.basename(raster_list[i]) index_dot = fname.index(".") raster_names.append(fname[:index_dot]) raster_names.append("fmask") # Make vrt with gdalbuildvrt print("Make virtual raster with variables as raster bands") input_var = " ".join(raster_list) output_vrt = var_dir + "/var.vrt" param = ["gdalbuildvrt", "-overwrite", "-separate", "-resolution user", "-te", str(Xmin), str(Ymin), str(Xmax), str(Ymax), "-tr", str(gt[1]), str(-gt[5]), output_vrt, input_var] cmd_gdalbuildvrt = " ".join(param) os.system(cmd_gdalbuildvrt) # Load vrt file stack = gdal.Open(output_vrt) nband = stack.RasterCount proj = stack.GetProjection() # List of nodata values bandND = np.zeros(nband) for k in range(nband): band = stack.GetRasterBand(k + 1) bandND[k] = band.GetNoDataValue() if (bandND[k] is None) or (bandND[k] is np.nan): print("NoData value is not specified for input raster file %d" % k) sys.exit(1) bandND = bandND.astype(np.float32) # Make blocks blockinfo = makeblock(output_vrt, blk_rows=blk_rows) nblock = blockinfo[0] nblock_x = blockinfo[1] x = blockinfo[3] y = blockinfo[4] nx = blockinfo[5] ny = blockinfo[6] print("Divide region in " + str(nblock) + " blocks") # Raster of predictions print("Create a raster file on disk for projections") driver = gdal.GetDriverByName("GTiff") Pdrv = driver.Create(output_file, ncol, nrow, 1, gdal.GDT_UInt16, ["COMPRESS=LZW", "PREDICTOR=2", "BIGTIFF=YES"]) Pdrv.SetGeoTransform(gt) Pdrv.SetProjection(proj) Pband = Pdrv.GetRasterBand(1) Pband.SetNoDataValue(0) # Predict by block # Message print("Predict deforestation probability by block") # Loop on blocks of data for b in range(nblock): # Progress bar progress_bar(nblock, b + 1) # Position in 1D-arrays px = b % nblock_x py = b / nblock_x # Number of pixels npix = nx[px] * ny[py] # Data for one block of the stack (shape = (nband,nrow,ncol)) data = stack.ReadAsArray(x[px], y[py], nx[px], ny[py]) # Replace ND values with -9999 for i in range(nband): data[i][np.nonzero(data[i] == bandND[i])] = -9999 # Forest mask fmaskA = fmaskB.ReadAsArray(x[px], y[py], nx[px], ny[py]) fmaskA = fmaskA.astype(np.float32) # From uint to float fmaskA[np.nonzero(fmaskA != 1)] = -9999 fmaskA = fmaskA[np.newaxis, :, :] # Concatenate forest mask with stack data = np.concatenate((data, fmaskA), axis=0) # Transpose and reshape to 2D array data = data.transpose(1, 2, 0) data = data.reshape(npix, nband + 1) # Observations without NA w = np.nonzero(~(data == -9999).any(axis=1)) # Remove observations with NA data = data[w] # Transform into a pandas DataFrame df = pd.DataFrame(data) df.columns = raster_names # Add fake "cell" column df["cell"] = 0 # Predict with binomial iCAR model pred = np.zeros(npix) # Initialize with nodata value (0) if len(w[0]) > 0: # Get predictions into an array p = predict_binomial_iCAR(model, new_data=df, rhos=data[:, -2]) # Rescale and return to pred pred[w] = rescale(p) # Assign prediction to raster pred = pred.reshape(ny[py], nx[px]) Pband.WriteArray(pred, x[px], y[py]) # Compute statistics print("Compute statistics") Pband.FlushCache() # Write cache data to disk Pband.ComputeStatistics(False) # Build overviews print("Build overviews") Pdrv.BuildOverviews("nearest", [4, 8, 16, 32]) # Dereference driver Pband = None del(Pdrv)
def sample(nsamp=10000, Seed=1234, csize=10, var_dir="data", input_forest_raster="forest.tif", output_file="output/sample.txt", blk_rows=0): """Sample points and extract raster values. This function (i) randomly draw spatial points in deforested and forested areas and (ii) extract environmental variable values for each spatial point. :param nsamp: number of random spatial points. :param seed: seed for random number generator. :param csize: spatial cell size in km. :param var_dir: directory with raster data. :param input_forest_raster: name of the forest raster file (1=forest, \ 0=deforested) in the var_dir directory :param output_file: path to file to save sample points. :param blk_rows: if > 0, number of lines per block. :return: a pandas DataFrame, each row being one observation. """ # Set random seed np.random.seed(Seed) # ============================================= # Sampling pixels # ============================================= print("Sample 2x" + str(nsamp) + " pixels (deforested vs. forest)") # Read defor raster forest_raster_file = os.path.join(var_dir, input_forest_raster) forestR = gdal.Open(forest_raster_file) forestB = forestR.GetRasterBand(1) # Make blocks blockinfo = makeblock(forest_raster_file, blk_rows=blk_rows) nblock = blockinfo[0] nblock_x = blockinfo[1] x = blockinfo[3] y = blockinfo[4] nx = blockinfo[5] ny = blockinfo[6] print("Divide region in " + str(nblock) + " blocks") # Number of defor/forest pixels by block print("Compute number of deforested and forest pixels per block") ndc = 0 ndc_block = np.zeros(nblock, dtype=np.int) nfc = 0 nfc_block = np.zeros(nblock, dtype=np.int) # Loop on blocks of data for b in range(nblock): # Progress bar progress_bar(nblock, b + 1) # Position in 1D-arrays px = b % nblock_x py = b / nblock_x # Read the data forest = forestB.ReadAsArray(x[px], y[py], nx[px], ny[py]) # Identify pixels (x/y coordinates) which are deforested deforpix = np.nonzero(forest == 0) ndc_block[b] = len(deforpix[0]) # Number of defor pixels ndc += len(deforpix[0]) # Identify pixels (x/y coordinates) which are forest forpix = np.nonzero(forest == 1) nfc_block[b] = len(forpix[0]) # Number of forest pixels nfc += len(forpix[0]) # Proba of drawing a block print("Draw blocks at random") proba_block_d = ndc_block.astype(np.float) / ndc proba_block_f = nfc_block.astype(np.float) / nfc # Draw block number nsamp times block_draw_d = np.random.choice(range(nblock), size=nsamp, replace=True, p=proba_block_d) block_draw_f = np.random.choice(range(nblock), size=nsamp, replace=True, p=proba_block_f) # Number of times the block is drawn nblock_draw_d = np.zeros(nblock, dtype=np.int) nblock_draw_f = np.zeros(nblock, dtype=np.int) for s in range(nsamp): nblock_draw_d[block_draw_d[s]] += 1 nblock_draw_f[block_draw_f[s]] += 1 # Draw defor/forest pixels in blocks print("Draw pixels at random in blocks") # Object to store coordinates of selected pixels deforselect = np.empty(shape=(0, 2), dtype=np.int) forselect = np.empty(shape=(0, 2), dtype=np.int) # Loop on blocks of data for b in range(nblock): # Progress bar progress_bar(nblock, b + 1) # nbdraw nbdraw_d = nblock_draw_d[b] nbdraw_f = nblock_draw_f[b] # Position in 1D-arrays px = b % nblock_x py = b / nblock_x # Read the data forest = forestB.ReadAsArray(x[px], y[py], nx[px], ny[py]) # Identify pixels (x/y coordinates) which are deforested # !! Values returned in row-major, C-style order (y/x) !! deforpix = np.nonzero(forest == 0) deforpix = np.transpose((x[px] + deforpix[1], y[py] + deforpix[0])) ndc_block = len(deforpix) # Identify pixels (x/y coordinates) which are forested forpix = np.nonzero(forest == 1) forpix = np.transpose((x[px] + forpix[1], y[py] + forpix[0])) nfc_block = len(forpix) # Sample deforested pixels if nbdraw_d > 0: if nbdraw_d < ndc_block: i = np.random.choice(ndc_block, size=nbdraw_d, replace=False) deforselect = np.concatenate((deforselect, deforpix[i]), axis=0) else: # nbdraw = ndc_block deforselect = np.concatenate((deforselect, deforpix), axis=0) # Sample forest pixels if nbdraw_f > 0: if nbdraw_f < nfc_block: i = np.random.choice(nfc_block, size=nbdraw_f, replace=False) forselect = np.concatenate((forselect, forpix[i]), axis=0) else: # nbdraw = ndc_block forselect = np.concatenate((forselect, forpix), axis=0) # ============================================= # Compute center of pixel coordinates # ============================================= print("Compute center of pixel coordinates") # Landscape variables from forest raster gt = forestR.GetGeoTransform() ncol = forestR.RasterXSize nrow = forestR.RasterYSize Xmin = gt[0] Xmax = gt[0] + gt[1] * ncol Ymin = gt[3] + gt[5] * nrow Ymax = gt[3] # Concatenate selected pixels select = np.concatenate((deforselect, forselect), axis=0) # Offsets and coordinates # xOffset = select[:, 0] yOffset = select[:, 1] pts_x = (xOffset + 0.5) * gt[1] + gt[0] # +0.5 for center of pixels pts_y = (yOffset + 0.5) * gt[5] + gt[3] # +0.5 for center of pixels # ================================================ # Compute cell number for spatial autocorrelation # ================================================ # Cell number from region print("Compute number of %d x %d km spatial cells" % (csize, csize)) csize = csize * 1000 # Transform km in m ncell_byrow = np.int(np.ceil((Xmax - Xmin) / csize)) ncell_bycol = np.int(np.ceil((Ymax - Ymin) / csize)) ncell = ncell_byrow * ncell_bycol print("... %d cells (%d x %d)" % (ncell, ncell_bycol, ncell_byrow)) # I and J are the coordinates of the cells and start at zero print("Identify cell number from XY coordinates") J = ((pts_x - Xmin) / csize).astype(np.int) I = ((Ymax - pts_y) / csize).astype(np.int) cell = I * ncell_byrow + J # Cell number starts at zero # ============================================= # Extract values from rasters # ============================================= # Raster list var_tif = var_dir + "/*.tif" raster_list = glob(var_tif) raster_list.sort() # Sort names # Make vrt with gdalbuildvrt # Note: Extent and resolution from forest raster! print("Make virtual raster with variables as raster bands") input_var = " ".join(raster_list) output_vrt = var_dir + "/var.vrt" param = [ "gdalbuildvrt", "-overwrite", "-separate", "-resolution user", "-te", str(Xmin), str(Ymin), str(Xmax), str(Ymax), "-tr", str(gt[1]), str(-gt[5]), output_vrt, input_var ] cmd_gdalbuildvrt = " ".join(param) os.system(cmd_gdalbuildvrt) # Load vrt file stack = gdal.Open(output_vrt) # List of nodata values nband = stack.RasterCount bandND = np.zeros(nband) for k in range(nband): band = stack.GetRasterBand(k + 1) bandND[k] = band.GetNoDataValue() if bandND[k] is None: print("NoData value is not specified \ for input raster file %s" % raster_list[k]) sys.exit(1) # Numpy array to store values nobs = select.shape[0] val = np.zeros(shape=(nobs, nband), dtype=np.float32) # Extract raster values print("Extract raster values for selected pixels") for i in range(nobs): # Progress bar progress_bar(nobs, i + 1) # ReadArray for extract extract = stack.ReadAsArray(xOffset[i], yOffset[i], 1, 1) val[i, :] = extract.reshape(nband, ) # # Using gdallocationinfo for extract is slow # cmd_gdallocation = "gdallocationinfo -valonly \ # -geoloc %s %f %f" % (outputfile, # pts_x[i], pts_y[i]) # extract = os.popen(cmd_gdallocation).read() # val[i, :] = np.array(extract.split("\n")[:-1]).astype(np.float32) # Close stack del stack # Replace NA # NB: ReadAsArray return float32 type bandND = bandND.astype(np.float32) for k in range(nband): val[val[:, k] == bandND[k], k] = np.nan # Add XY coordinates and cell number pts_x.shape = (nobs, 1) pts_y.shape = (nobs, 1) cell.shape = (nobs, 1) val = np.concatenate((val, pts_x, pts_y, cell), axis=1) # ============================================= # Export and return value # ============================================= print("Export results to file " + output_file) # Write to file by row colname = raster_list for i in range(len(raster_list)): base_name = os.path.basename(raster_list[i]) index_dot = base_name.index(".") colname[i] = base_name[:index_dot] varname = ",".join(colname) + ",X,Y,cell" np.savetxt(output_file, val, header=varname, fmt="%s", delimiter=",", comments="") # Convert to pandas DataFrame and return the result colname.extend(["X", "Y", "cell"]) val_DF = pd.DataFrame(val, columns=colname) return (val_DF)
def validation(pred, obs, blk_rows=128): """Compute accuracy indices based on predicted and observed forest-cover change (fcc) maps. Compute the Overall Accuracy, the Figure of Merit, the Specificity, the Sensitivity, the True Skill Statistics and the Cohen's Kappa from a confusion matrix built on predictions vs. observations. :param pred: raster of predicted fcc. :param obs: raster of observed fcc. :param blk_rows: if > 0, number of rows for block (else 256x256). :return: a dictionnary of accuracy indices. """ # Load raster and band predR = gdal.Open(pred) predB = predR.GetRasterBand(1) obsR = gdal.Open(obs) obsB = obsR.GetRasterBand(1) # Make blocks blockinfo = makeblock(pred, blk_rows=blk_rows) nblock = blockinfo[0] nblock_x = blockinfo[1] x = blockinfo[3] y = blockinfo[4] nx = blockinfo[5] ny = blockinfo[6] print("Divide region in " + str(nblock) + " blocks") # Initialize the confusion matrix n00 = 0.0 n10 = 0.0 n01 = 0.0 n11 = 0.0 # Compute the confusion matrix print("Compute the confusion matrix") # Loop on blocks of data for b in range(nblock): # Progress bar progress_bar(nblock, b + 1) # Position in 1D-arrays px = b % nblock_x py = b / nblock_x # Data for one block df_pred = predB.ReadAsArray(x[px], y[py], nx[px], ny[py]) df_pred = 1 - df_pred df_obs = obsB.ReadAsArray(x[px], y[py], nx[px], ny[py]) df_obs = 1 - df_obs # Update confusion matrix n00 = n00 + np.sum((df_pred == 0) & (df_obs == 0)) n10 = n10 + np.sum((df_pred == 1) & (df_obs == 0)) n01 = n01 + np.sum((df_pred == 0) & (df_obs == 1)) n11 = n11 + np.sum((df_pred == 1) & (df_obs == 1)) # Dereference driver predB = None del (predR) obsB = None del (obsR) # Print confusion matrix mat = pd.DataFrame({ "obs0": pd.Series([n00, n10], index=["pred0", "pred1"]), "obs1": pd.Series([n01, n11], index=["pred0", "pred1"]) }) print(mat) # Accuracy indices print("Compute accuracy indices") OA = (n11 + n00) / (n11 + n10 + n00 + n01) FOM = n11 / (n11 + n10 + n01) Sensitivity = n11 / (n11 + n01) Specificity = n00 / (n00 + n10) TSS = Sensitivity + Specificity - 1 N = n11 + n10 + n00 + n01 Observed_accuracy = (n11 + n00) / N Expected_accuracy = ((n11 + n10) * ((n11 + n01) / N) + (n00 + n01) * ((n00 + n10) / N)) / N Kappa = (Observed_accuracy - Expected_accuracy) / (1 - Expected_accuracy) r = { "OA": round(OA, 2), "FOM": round(FOM, 2), "Sen": round(Sensitivity, 2), "Spe": round(Specificity, 2), "TSS": round(TSS, 2), "K": round(Kappa, 2) } return (r)