def importRaster(rasterpath): d = None try: print(os.getcwd()) d = gdal.Open(rasterpath) b = gdalIO.raster_dataset_to_indexed_numpy(d, os.path.basename(rasterpath), maxbands = 10, bandLocation="bycolumn", nodata= 0) print("saving object to disk") fileIO.save_object(os.getcwd()+ "/multibandtest", b) print("loading object from disk") c = fileIO.load_object(os.getcwd()+ "/multibandtest") return b, c except Exception as e: print(e) finally: if d is not None: d = None
def importSingleBand(rasterpath): d = None band = None try: d = gdal.Open(rasterpath) band = d.GetRasterBand(1) print ("exporting to indexed numpy array") a = gdalIO.single_band_to_indexed_numpy(band,nodata=0) print ("saving object to disk") fileIO.save_object( os.getcwd()+ "/singlebandtest", a) print ("loading object from disk") c=fileIO.load_object(os.getcwd()+ "/singlebandtest") return a,c except Exception as e: print (e) finally: if d is not None: d = None if band is not None: band = None
def test_dense_berry_work(): # 1 create unique id and set the output folder folder = "/vagrant/code/pysdss/data/output/text/" id = utils.create_uniqueid() if not os.path.exists(folder + id): os.mkdir(folder + id) outfolder = folder + id + "/" file = "/vagrant/code/pysdss/data/input/2016_06_17.csv" file = shutil.copy(file, outfolder + "/" + id + "_keep.csv") # copy to the directory # 5 set data properties: correct field names if necessary usecols = ["%Dataset_id", " Row", " Raw_fruit_count", " Visible_fruit_per_m", " Latitude", " Longitude", "Harvestable_A", "Harvestable_B", "Harvestable_C", "Harvestable_D"] new_column_names = ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"] # 6 set data properties: automatically calculate the average distance between points after checking if # duplicate points exists, (the user can also set an average distance from the web application) df = utils.remove_duplicates(file, [" Latitude", " Longitude"]) df.to_csv(file, index=False) del df # 7 calculate statistics along the rows id, stats = berrycolor_workflow(id, file, usecols, new_column_names, outfolder, average_point_distance=None, grid=None, rowdirection="x", area_multiplier=2, filterzero=False, project="32611", rows_from_rawdata=True, nodata=-1, force_interpolation=False) #set force_interpolation=False to skip interpolation # 8 interpret statistics fileIO.save_object(outfolder + "/_stats", stats) # stats = fileIO.load_object( folder+str(id)+"/_stats") interpret_result(stats, outfolder) #test_dense_berry_work()
def test_berry_work2(): # 1 create unique id and set the output folder folder = "/vagrant/code/pysdss/data/output/text/" id = utils.create_uniqueid() try: # 2 save the ID IN THE DATABASE dt.create_process(dt.default_connection, id, "claudio", dt.geotypes.interpolation.name) # 3 -4 dowload the pointdata from the database, create new folder #TODO: create function to read from the database if not os.path.exists(folder + id): os.mkdir(folder + id) outfolder = folder + id + "/" file = "/vagrant/code/pysdss/data/input/2016_06_17.csv" file = shutil.copy(file, outfolder + "/" + id + "_keep.csv") # copy to the directory ########## # 5 set data properties: correct field names if necessary usecols = ["%Dataset_id", " Row", " Raw_fruit_count", " Visible_fruit_per_m", " Latitude", " Longitude", "Harvestable_A", "Harvestable_B", "Harvestable_C", "Harvestable_D"] #this names are mandatory for the current implementation new_column_names = ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"] # 6 set data properties: automatically calculate the average distance between points after checking if # duplicate points exists, (the user can also set an average distance from the web application) df = utils.remove_duplicates(file, [" Latitude", " Longitude"]) df.to_csv(file, index=False) del df # avdist = utils.average_point_distance(file, " Longitude", " Latitude", " Row", direction="x",remove_duplicates=False) # 7 calculate statistics along the rows id, stats = berrycolor_workflow(id, file, usecols, new_column_names, outfolder, average_point_distance=None, grid=None, rowdirection="x", area_multiplier=2, filterzero=False, project="32611", rows_from_rawdata=True, nodata=-1, force_interpolation=True) # 8 interpret statistics fileIO.save_object(outfolder + "/_stats", stats) # stats = fileIO.load_object( folder+str(id)+"/_stats") interpret_result(stats, outfolder) # 9 save the operation state to the database js = json.dumps( { "result": [ { "type": "csvtable", "name": "clorgrade_a_table", "path": outfolder + "/_a_output.csv" }, { "type": "csvtable", "name": "clorgrade_b_table", "path": outfolder + "/_b_output.csv" }, { "type": "csvtable", "name": "clorgrade_c_table", "path": outfolder + "/_c_output.csv" }, { "type": "csvtable", "name": "clorgrade_d_table", "path": outfolder + "/_d_output.csv" } ], "error": {} } ) dt.update_process(dt.default_connection, id, "claudio", dt.geomessages.completed.name, js) #### use geostatistics to get the image rasterized image (indicator kriging?) except Exception as e: js = json.dumps( { "result": [], "error": {"message": str(e)} } ) dt.update_process(dt.default_connection, id, "claudio", dt.geomessages.error.name, js)
def berrycolor_workflow(id, file, usecols, new_column_names, outfolder, average_point_distance=None, grid=None, rowdirection="x",area_multiplier=2, filterzero=False, project="32611", buffersize=0.25, nodata=-1, force_interpolation=False ): """ This is the workflow for direct interpolation of high and low density colorgrade data For high density data statistics are calculated directly from the vector data The function returns a dictionary with statistics in the form {"colorgrade": {row: [0.031, 0.029, 93.0, 83.75, 9.25, 118339.1, 318.11, 29.281, 213405.12, 573.66968, 61.072674], the list values are average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count, average raw fruit count, std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm :param id: the unique id for this operation :param file: the csv file with the data :param usecols: a list with the original field names :param new_column_names: a list with the new field names in the current implementation names can only be ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"] :param outfolder: the folder for the output :param average_point_distance: this number is use to decide how to rasterize the rows and the search radius when for the inverse distance is applied :param grid: the interpolation grid, if None the grid will be calculated on the data boundaries grid format is {'minx': ,'maxx': ,'miny': ,'maxy': ,'delty': , 'deltx': } :param rowdirection: the direction of the vineyard rows, default "x", pass any other string otherwise :param area_multiplier: used to increase the interpolation grid resolution, use 2 to halve the pixel size :param filterzero: True to filter out zeros from the data :param project: epsg string for reprojection :param buffersize: the buffersize for buffered polyline :param nodata: the nodata value to assign to nodata pixels :param force_interpolation: True if interpolation should be carried out also for average_point_distance under the threshold :return: a dictionary with the statistics in the form {"colorgrade": {row: [0.031, 0.029, 93.0, 83.75, 9.25, 118339.1, 318.11, 29.281, 213405.12, 573.66968, 61.072674], the list values are average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count, average raw fruit count, std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm """ #this is the hardcoded threshold under which data is considered dense threshold = 0.5 #######checking inputs if new_column_names != ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]: raise ValueError('With the current implementation column names must be ' '["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]') if filterzero: raise NotImplementedError("filtering out zero is not implemented") ######### # set the path to folder with interpolation settings jsonpath = os.path.join(os.path.dirname(__file__), '../../..', "pysdss/experiments/interpolation/") jsonpath = os.path.normpath(jsonpath) # open the file and fix the column names df = pd.read_csv(file, usecols=usecols) df.columns = new_column_names if project: print("reprojecting data") west, north = utils2.reproject_coordinates(df, "epsg:" + str(project), "lon", "lat", False) filter.set_field_value(df, west, "lon") filter.set_field_value(df, north, "lat") # overwrite the input file df.to_csv(file, index=False) # todo check this is ok when there is no filtering if not average_point_distance: print("calculating average distance") average_point_distance = utils.average_point_distance(file, "lon", "lat", "row", direction=rowdirection, rem_duplicates=True, operation="mean") #set the interpolation radius based on threshold and average_point distance radius = "0.8" if average_point_distance <= threshold else str(average_point_distance * 2) print("defining interpolation grid") # define the interpolation grid if grid is None: # need a grid minx = math.floor(df['lon'].min()) maxx = math.ceil(df['lon'].max()) miny = math.floor(df['lat'].min()) maxy = math.ceil(df['lat'].max()) delty = maxy - miny # height deltx = maxx - minx # width else: # the user passed a grid object minx = grid['minx'] maxx = grid['maxx'] miny = grid['miny'] maxy = grid['maxy'] delty = grid['delty'] # height deltx = grid['deltx'] # width '''' if filterzero: #todo find best way for filtering for multiple columns #keep, discard = filter.filter_byvalue(df, 0, ">", colname="d") else: keep = df discard = None if discard: discard.to_csv(folder + id + "_discard.csv", index=False) ''' # open the model for for creting gdal virtual files with open(jsonpath + "/vrtmodel.txt") as f: xml = f.read() ##### define rows print("extracting rows") if average_point_distance > threshold: ########SPARSE DATA, create rasterized buffered polyline # 1 convert point to polyline utils2.csv_to_polyline_shapefile(df, ycol="lat", xcol="lon", linecol="row", epsg=project, outpath=outfolder + "/rows.shp") # 2 buffer the polyline utils2.buffer(outfolder + "/rows.shp", outfolder + "/rows_buffer.shp", buffersize) # 3rasterize poliline path = jsonpath + "/rasterize.json" params = utils.json_io(path, direction="in") params["-a"] = "id_row" # params["-te"]= str(minx) + " " + str(miny) + " " + str(maxx) + " " + str(maxy) params["-te"] = str(minx) + " " + str(maxy) + " " + str(maxx) + " " + str(miny) params["-ts"] = str(deltx * area_multiplier) + " " + str(delty * area_multiplier) params["-ot"] = "Int16" params["-a_nodata"] = str(nodata) params["src_datasource"] = outfolder + "rows_buffer.shp" params["dst_filename"] = outfolder + "/" + id + "_rows.tiff" # build gdal_grid request text = grd.build_gdal_rasterize_string(params, outtext=False) print(text) text = ["gdal_rasterize"] + text print(text) # call gdal_rasterize print("rasterizing the rows") out, err = utils.run_tool(text) print("output" + out) if err: print("error:" + err) raise Exception("gdal rasterize failed") else: #############DENSE DATA we are under the threshold, rasterize points with nearest neighbour if force_interpolation: data = {"layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat", "elevation": "row"} utils2.make_vrt(xml, data, outfolder + "/" + id + "_keep_row.vrt") '''newxml = xml.format(**data) f = open(folder + id+"_keep_row.vrt", "w") f.write(newxml) f.close()''' path = jsonpath + "/nearest.json" params = utils.json_io(path, direction="in") params["-txe"] = str(minx) + " " + str(maxx) params["-tye"] = str(miny) + " " + str(maxy) params["-outsize"] = str(deltx * area_multiplier) + " " + str(delty * area_multiplier) params["-a"]["nodata"] = str(nodata) params["-a"]["radius1"] = radius params["-a"]["radius2"] = radius params["src_datasource"] = outfolder + "/" + id + "_keep_row.vrt" params["dst_filename"] = outfolder + "/" + id + "_rows.tiff" # build gdal_grid request text = grd.build_gdal_grid_string(params, outtext=False) print(text) text = ["gdal_grid"] + text print(text) # call gdal_grid print("Getting the row raster") out, err = utils.run_tool(text) print("output" + out) if err: print("error:" + err) raise Exception("gdal grid failed") else: # we calculate statistics on the vector data, no need for interpolation return berrycolor_workflow_dense(df, id) # extract index from the rows d = gdal.Open(outfolder + "/" + id + "_rows.tiff") row_index, row_indexed, row_properties = gdalIO.raster_dataset_to_indexed_numpy(d, id, maxbands=1, bandLocation="byrow", nodata=nodata) print("saving indexed array to disk") fileIO.save_object(outfolder + "/" + id + "_rows_index", (row_index, row_indexed, row_properties)) d = None # output the 4 virtual files for the 4 columns for clr in ["a", "b", "c", "d"]: data = {"layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat", "elevation": clr} utils2.make_vrt(xml, data, outfolder + "/" + id + "_" + clr + ".vrt") # output the 2 virtual files for or raw fruit count and visible fruit count data = {"layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat", "elevation": "raw_fruit_count"} utils2.make_vrt(xml, data, outfolder + "/" + id + "_rawfruit.vrt") data = {"layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat", "elevation": "visible_fruit_per_m"} utils2.make_vrt(xml, data, outfolder + "/" + id + "_visiblefruit.vrt") # prepare interpolation parameters path = jsonpath + "/invdist.json" params = utils.json_io(path, direction="in") params["-txe"] = str(minx) + " " + str(maxx) params["-tye"] = str(miny) + " " + str(maxy) params["-outsize"] = str(deltx * area_multiplier) + " " + str(delty * area_multiplier) params["-a"]["radius1"] = radius params["-a"]["radius2"] = radius # params["-a"]["smoothing"] = "20" # params["-a"]["power"] = "0" params["-a"]["nodata"] = str(nodata) # first interpolate the count data for clr in ["raw", "visible"]: params["src_datasource"] = outfolder + "/" + id + "_" + clr + "fruit.vrt" params["dst_filename"] = outfolder + "/" + id + "_" + clr + "fruit.tiff" # print(params) # build gdal_grid request text = grd.build_gdal_grid_string(params, outtext=False) print(text) text = ["gdal_grid"] + text print(text) # call gdal_grid print("Interpolating for count " + clr) out, err = utils.run_tool(text) print("output" + out) if err: print("error:" + err) raise Exception("gdal grid failed") # upload to numpy d = gdal.Open(outfolder + "/" + id + "_rawfruit.tiff") band = d.GetRasterBand(1) # apply index new_r_indexed_raw = gdalIO.apply_index_to_single_band(band, row_index) d = None d = gdal.Open(outfolder + "/" + id + "_visiblefruit.tiff") band = d.GetRasterBand(1) # apply index new_r_indexed_visible = gdalIO.apply_index_to_single_band(band, row_index) d = None # check if all pixels have a value, otherwise assign nan to nodata value (wich will not be considered for statistics) if new_r_indexed_raw.min() == nodata: warnings.warn( "indexed data visible berry raw counr has nodata values, the current implementation will" " count this pixels as nozero values", RuntimeWarning) new_r_indexed_raw[new_r_indexed_raw == nodata] = 'nan' # careful nan is float if new_r_indexed_visible.min() == nodata: warnings.warn( "indexed data visible berry per meters has nodata values, the current implementation will" " count this pixels as nozero values", RuntimeWarning) new_r_indexed_visible[new_r_indexed_visible == nodata] = 'nan' stats = {} for clr in ["a", "b", "c", "d"]: params["src_datasource"] = outfolder + "/" + id + "_" + clr + ".vrt" params["dst_filename"] = outfolder + "/" + id + "_" + clr + ".tiff" # build gdal_grid request text = grd.build_gdal_grid_string(params, outtext=False) print(text) text = ["gdal_grid"] + text print(text) # call gdal_grid print("interpolating for color " + clr) out, err = utils.run_tool(text) print("output" + out) if err: print("error:" + err) raise Exception("gdal grid failed") # upload to numpy d = gdal.Open(outfolder + "/" + id + "_" + clr + ".tiff") band = d.GetRasterBand(1) # apply index new_r_indexed = gdalIO.apply_index_to_single_band(band, row_index) # this is the index from the rows d = None # check if all pixels have a value, otherwise assign nan to nodata value if new_r_indexed.min() == nodata: warnings.warn("indexed data for colorgrade " + clr + " has nodata values, the current implementation will" " count this pixels as nozero values", RuntimeWarning) new_r_indexed[new_r_indexed == nodata] = 'nan' # careful nan is float stats[clr] = {} for i in np.unique(row_indexed): # get the row numbers area = math.pow(1 / area_multiplier, 2) # the pixel area # get a mask for the current row mask = row_indexed == i # statistics for current row # average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count, # average raw fruit count, std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm # r_indexed is 2d , while new_r_indexed and mask are 1d ''' stats[clr][i] = [new_r_indexed[mask[0,:]].nanmean(), new_r_indexed[mask[0,:]].nanstd(), #todo the sum considers nan different from 0 new_r_indexed[mask[0,:]].shape[0] * area, #could use .size? np.count_nonzero(new_r_indexed[mask[0,:]]) * area, new_r_indexed[mask[0,:]][new_r_indexed[mask[0,:]] == 0].shape[0] * area, new_r_indexed_raw[mask[0,:]].nansum(), new_r_indexed_raw[mask[0,:]].nanmean(), new_r_indexed_raw[mask[0,:]].nanstd(), new_r_indexed_visible[mask[0,:]].nansum(), new_r_indexed_visible[mask[0,:]].nanmean(), new_r_indexed_visible[mask[0,:]].nanstd()] ''' stats[clr][i] = [np.nanmean(new_r_indexed[mask[0, :]]), np.nanstd(new_r_indexed[mask[0, :]]), # todo the sum considers nan different from 0 new_r_indexed[mask[0, :]].shape[0] * area, # could use .size? np.count_nonzero(new_r_indexed[mask[0, :]]) * area, new_r_indexed[mask[0, :]][new_r_indexed[mask[0, :]] == 0].shape[0] * area, np.nansum(new_r_indexed_raw[mask[0, :]]), np.nanmean(new_r_indexed_raw[mask[0, :]]), np.nanstd(new_r_indexed_raw[mask[0, :]]), np.nansum(new_r_indexed_visible[mask[0, :]]), np.nanmean(new_r_indexed_visible[mask[0, :]]), np.nanstd(new_r_indexed_visible[mask[0, :]])] return id, stats
def berrycolor_workflow_2_old( file, folder="/vagrant/code/pysdss/data/output/text/", gdal=True): """ testing interpolation and statistics along the line (this was the old workflow_2 for sparse data, when there was also the workflow_3, now there is only a workflow 2) (see colordata/colordata.py) use gdal False for scipy radial basis :return: """ ############################ 1 download filtered data with the chosen and create a dataframe # create a folder to store the output id = utils.create_uniqueid() if not os.path.exists(folder + id): os.mkdir(folder + id) folder = folder + id + "/" #set the path to folder with settings jsonpath = os.path.join(os.path.dirname(__file__), '..', "pysdss/experiments/interpolation/") jsonpath = os.path.normpath(jsonpath) #1 convert point to polyline utils2.csv_to_polyline_shapefile(file, ycol="lat", xcol="lon", linecol="row", epsg=32611, outpath=folder + "rows.shp") ############################# 2 buffer the polyline utils2.buffer(folder + "rows.shp", folder + "rows_buffer.shp", 0.25) ############################## 3rasterize poliline #need a grid df = pd.read_csv(file) minx = math.floor(df['lon'].min()) maxx = math.ceil(df['lon'].max()) miny = math.floor(df['lat'].min()) maxy = math.ceil(df['lat'].max()) delty = maxy - miny #height deltx = maxx - minx #width area_multiplier = 2 #2 to double the resulution and halve the pixel size to 0.5 meters path = jsonpath + "/rasterize.json" params = utils.json_io(path, direction="in") params["-a"] = "id_row" params["-te"] = str(minx) + " " + str(miny) + " " + str( maxx) + " " + str(maxy) params["-ts"] = str(deltx * 2) + " " + str( delty * 2) #pixel 0,5 meters params["-ot"] = "Int16" params["src_datasource"] = folder + "rows_buffer.shp" params["dst_filename"] = folder + "rows_buffer.tiff" # build gdal_grid request text = grd.build_gdal_rasterize_string(params, outtext=False) print(text) text = ["gdal_rasterize"] + text print(text) # call gdal_rasterize print("rasterizing the rows") out, err = utils.run_tool(text) print("output" + out) if err: print("error:" + err) raise Exception("gdal rasterize failed") ################################# 4get buffered poliline index d = gdal.Open(folder + "rows_buffer.tiff") row_index, row_indexed, row_properties = gdalIO.raster_dataset_to_indexed_numpy( d, id, maxbands=1, bandLocation="byrow", nodata=-1) print("saving indexed array to disk") fileIO.save_object(folder + "rows_buffer_index", (row_index, row_indexed, row_properties)) d = None ################################### 5interpolate points, use the index to extract statistics along the line with open(jsonpath + "/vrtmodel.txt") as f: xml = f.read() # output the 4 virtual files for the 4 columns for clr in ["a", "b", "c", "d"]: data = { "layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat", "elevation": clr } utils2.make_vrt(xml, data, folder + "_" + clr + ".vrt") # output the 2 virtual files for or raw fruit count and visible fruit count data = { "layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat", "elevation": "raw_fruit_count" } utils2.make_vrt(xml, data, folder + "_rawfruit.vrt") data = { "layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat", "elevation": "visible_fruit_per_m" } utils2.make_vrt(xml, data, folder + "_visiblefruit.vrt") # interpolate if gdal: path = jsonpath + "/invdist.json" params = utils.json_io(path, direction="in") params["-txe"] = str(minx) + " " + str(maxx) params["-tye"] = str(miny) + " " + str(maxy) params["-outsize"] = str(deltx * area_multiplier) + " " + str( delty * area_multiplier) params["-a"]["radius1"] = "10" params["-a"]["radius2"] = "10" params["-a"]["smoothing"] = "20" params["-a"]["power"] = "0" else: #scipy #set up the interpolation grid tix = np.linspace(minx, maxx, deltx * 2) tiy = np.linspace(miny, maxy, delty * 2) XI, YI = np.meshgrid(tix, tiy) if gdal: for clr in ["raw", "visible"]: params["src_datasource"] = folder + "_" + clr + "fruit.vrt" params["dst_filename"] = folder + "_" + clr + "fruit.tiff" #print(params) # build gdal_grid request text = grd.build_gdal_grid_string(params, outtext=False) print(text) text = ["gdal_grid"] + text print(text) # call gdal_grid print("Interpolating for count " + clr) out, err = utils.run_tool(text) print("output" + out) if err: print("error:" + err) raise Exception("gdal grid failed") else: for clr in ["raw_fruit_count", "visible_fruit_per_m"]: rbf = Rbf(df['lon'].values, df['lat'].values, df[clr].values) ZI = rbf(XI, YI) print() # upload to numpy d = gdal.Open(folder + "_rawfruit.tiff") band = d.GetRasterBand(1) # apply index new_r_indexed_raw = gdalIO.apply_index_to_single_band(band, row_index) d = None d = gdal.Open(folder + "_visiblefruit.tiff") band = d.GetRasterBand(1) # apply index new_r_indexed_visible = gdalIO.apply_index_to_single_band( band, row_index) d = None stats = {} for clr in ["a", "b", "c", "d"]: params["src_datasource"] = folder + "_" + clr + ".vrt" params["dst_filename"] = folder + "_" + clr + ".tiff" # build gdal_grid request text = grd.build_gdal_grid_string(params, outtext=False) print(text) text = ["gdal_grid"] + text print(text) # call gdal_grid print("interpolating for color " + clr) out, err = utils.run_tool(text) print("output" + out) if err: print("error:" + err) raise Exception("gdal grid failed") # upload to numpy d = gdal.Open(folder + "_" + clr + ".tiff") band = d.GetRasterBand(1) # apply index new_r_indexed = gdalIO.apply_index_to_single_band( band, row_index) # this is the index from the rows d = None stats[clr] = {} for i in np.unique(row_indexed): # get the row numbers area = math.pow(1 / area_multiplier, 2) # get a mask for the current row mask = row_indexed == i # statistics for current row # average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count, # average raw fruit count, std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm # r_indexed is 2d , while new_r_indexed and mask are 1d stats[clr][i] = [ new_r_indexed[mask[0, :]].mean(), new_r_indexed[mask[0, :]].std(), new_r_indexed[mask[0, :]].shape[0] * area, np.count_nonzero(new_r_indexed[mask[0, :]]) * area, new_r_indexed[mask[0, :]][new_r_indexed[mask[0, :]] == 0].shape[0] * area, new_r_indexed_raw[mask[0, :]].sum(), new_r_indexed_raw[mask[0, :]].mean(), new_r_indexed_raw[mask[0, :]].std(), new_r_indexed_visible[mask[0, :]].sum(), new_r_indexed_visible[mask[0, :]].mean(), new_r_indexed_visible[mask[0, :]].std() ] return id, stats