예제 #1
0
    def importRaster(rasterpath):
        d = None
        try:
            print(os.getcwd())
            d = gdal.Open(rasterpath)
            b = gdalIO.raster_dataset_to_indexed_numpy(d, os.path.basename(rasterpath), maxbands = 10, bandLocation="bycolumn", nodata= 0)
            print("saving object to disk")
            fileIO.save_object(os.getcwd()+ "/multibandtest", b)
            print("loading object from disk")
            c = fileIO.load_object(os.getcwd()+ "/multibandtest")
            return b, c

        except Exception as e:
            print(e)
        finally:
            if d is not None:
                d = None
예제 #2
0
    def importSingleBand(rasterpath):
        d = None
        band = None
        try:
            d = gdal.Open(rasterpath)
            band = d.GetRasterBand(1)
            print ("exporting to indexed numpy array")
            a = gdalIO.single_band_to_indexed_numpy(band,nodata=0)
            print ("saving object to disk")
            fileIO.save_object( os.getcwd()+ "/singlebandtest", a)
            print ("loading object from disk")
            c=fileIO.load_object(os.getcwd()+ "/singlebandtest")
            return a,c

        except Exception as e:
            print (e)
        finally:
            if d is not None:
                d = None
            if band is not None:
                band = None
예제 #3
0
    def test_dense_berry_work():

        # 1 create unique id and set the output folder
        folder = "/vagrant/code/pysdss/data/output/text/"
        id = utils.create_uniqueid()

        if not os.path.exists(folder + id):
            os.mkdir(folder + id)
        outfolder = folder + id + "/"

        file = "/vagrant/code/pysdss/data/input/2016_06_17.csv"
        file = shutil.copy(file, outfolder + "/" + id + "_keep.csv")  # copy to the directory

        # 5 set data properties: correct field names if necessary
        usecols = ["%Dataset_id", " Row", " Raw_fruit_count", " Visible_fruit_per_m", " Latitude", " Longitude",
                   "Harvestable_A", "Harvestable_B", "Harvestable_C", "Harvestable_D"]

        new_column_names = ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]

        # 6 set data properties: automatically calculate the average distance between points after checking  if
        # duplicate points exists, (the user can also set an average distance from the web application)

        df = utils.remove_duplicates(file, [" Latitude", " Longitude"])
        df.to_csv(file, index=False)
        del df

        # 7 calculate statistics along the rows
        id, stats = berrycolor_workflow(id, file, usecols, new_column_names, outfolder, average_point_distance=None,
                                          grid=None, rowdirection="x",
                                          area_multiplier=2, filterzero=False, project="32611", rows_from_rawdata=True,
                                          nodata=-1, force_interpolation=False) #set force_interpolation=False to skip interpolation
        # 8 interpret statistics
        fileIO.save_object(outfolder + "/_stats", stats)
        # stats = fileIO.load_object( folder+str(id)+"/_stats")
        interpret_result(stats, outfolder)

    #test_dense_berry_work()
예제 #4
0
    def test_berry_work2():
        # 1 create unique id and set the output folder
        folder = "/vagrant/code/pysdss/data/output/text/"
        id = utils.create_uniqueid()

        try:
            # 2 save the ID IN THE DATABASE
            dt.create_process(dt.default_connection, id, "claudio", dt.geotypes.interpolation.name)

            # 3 -4  dowload the pointdata from the database, create new folder #TODO: create function to read from the database
            if not os.path.exists(folder + id):
                os.mkdir(folder + id)
            outfolder = folder + id + "/"

            file = "/vagrant/code/pysdss/data/input/2016_06_17.csv"
            file = shutil.copy(file, outfolder + "/" + id + "_keep.csv")  # copy to the directory
            ##########

            # 5 set data properties: correct field names if necessary
            usecols = ["%Dataset_id", " Row", " Raw_fruit_count", " Visible_fruit_per_m", " Latitude", " Longitude",
                       "Harvestable_A", "Harvestable_B", "Harvestable_C", "Harvestable_D"]

            #this names are mandatory for the current implementation
            new_column_names = ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]

            # 6 set data properties: automatically calculate the average distance between points after checking  if
            # duplicate points exists, (the user can also set an average distance from the web application)

            df = utils.remove_duplicates(file, [" Latitude", " Longitude"])
            df.to_csv(file, index=False)
            del df

            # avdist = utils.average_point_distance(file, " Longitude", " Latitude", " Row", direction="x",remove_duplicates=False)

            # 7 calculate statistics along the rows
            id, stats = berrycolor_workflow(id, file, usecols, new_column_names, outfolder, average_point_distance=None,
                                              grid=None, rowdirection="x",
                                              area_multiplier=2, filterzero=False, project="32611", rows_from_rawdata=True,
                                              nodata=-1, force_interpolation=True)
            # 8 interpret statistics
            fileIO.save_object(outfolder + "/_stats", stats)
            # stats = fileIO.load_object( folder+str(id)+"/_stats")
            interpret_result(stats, outfolder)

            # 9 save the operation state to the database

            js = json.dumps(
                {
                    "result": [
                        {
                            "type": "csvtable",
                            "name": "clorgrade_a_table",
                            "path": outfolder + "/_a_output.csv"
                        }, {
                            "type": "csvtable",
                            "name": "clorgrade_b_table",
                            "path": outfolder + "/_b_output.csv"
                        }, {
                            "type": "csvtable",
                            "name": "clorgrade_c_table",
                            "path": outfolder + "/_c_output.csv"
                        }, {
                            "type": "csvtable",
                            "name": "clorgrade_d_table",
                            "path": outfolder + "/_d_output.csv"
                        }
                    ],
                    "error": {}
                }
            )

            dt.update_process(dt.default_connection, id, "claudio", dt.geomessages.completed.name, js)

            #### use geostatistics to get the image rasterized image (indicator kriging?)

        except Exception as e:

            js = json.dumps(
                {
                    "result": [],
                    "error": {"message": str(e)}
                }
            )
            dt.update_process(dt.default_connection, id, "claudio", dt.geomessages.error.name, js)
예제 #5
0
def berrycolor_workflow(id, file, usecols, new_column_names, outfolder, average_point_distance=None, grid=None,
                          rowdirection="x",area_multiplier=2, filterzero=False, project="32611", buffersize=0.25,
                          nodata=-1, force_interpolation=False ):
    """
    This is the workflow for direct interpolation of high and low density colorgrade data
    For high density data statistics are calculated directly from the vector data
    
    The function returns a dictionary with statistics in the form
    {"colorgrade": {row: [0.031, 0.029, 93.0, 83.75, 9.25, 118339.1, 318.11, 29.281, 213405.12, 573.66968, 61.072674],
        the list values are average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count, average raw fruit count, 
        std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm
       
    :param id: the unique id for this operation
    :param file: the csv file with the data
    :param usecols: a list with the original field names 
    :param new_column_names: a list with the new field names

        in the current implementation names can only be
        ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]

    :param outfolder: the folder for the output 
    :param average_point_distance: this number is use to decide how to rasterize the rows and the search radius when
                                    for the inverse distance is applied
    :param grid: the interpolation grid, if None the grid will be calculated on the data boundaries

        grid format is {'minx': ,'maxx': ,'miny': ,'maxy': ,'delty': , 'deltx': }
    :param rowdirection: the direction of the vineyard rows, default "x", pass any other string otherwise
    :param area_multiplier: used to increase the interpolation grid resolution, use 2 to halve the pixel size
    :param filterzero: True to filter out zeros from the data
    :param project: epsg string for reprojection
    :param buffersize: the buffersize for buffered polyline
    :param nodata: the nodata value to assign to nodata pixels
    :param force_interpolation: True if interpolation should be carried out also for average_point_distance under the threshold
    :return: a dictionary with the statistics in the form

        {"colorgrade": {row: [0.031, 0.029, 93.0, 83.75, 9.25, 118339.1, 318.11, 29.281, 213405.12, 573.66968, 61.072674],
        the list values are average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count, average raw fruit count, 
        std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm
    """

    #this is the hardcoded threshold under which data is considered dense
    threshold = 0.5

    #######checking inputs
    if new_column_names != ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]:
        raise ValueError('With the current implementation column names must be '
                         '["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]')
    if filterzero:
        raise NotImplementedError("filtering out zero is not implemented")
    #########

    # set the path to folder with interpolation settings
    jsonpath = os.path.join(os.path.dirname(__file__), '../../..', "pysdss/experiments/interpolation/")
    jsonpath = os.path.normpath(jsonpath)

    # open the file and fix the column names
    df = pd.read_csv(file, usecols=usecols)
    df.columns = new_column_names

    if project:
        print("reprojecting data")

        west, north = utils2.reproject_coordinates(df, "epsg:" + str(project), "lon", "lat", False)
        filter.set_field_value(df, west, "lon")
        filter.set_field_value(df, north, "lat")

        # overwrite the input file
        df.to_csv(file, index=False)  # todo check this is ok when there is no filtering

    if not average_point_distance:
        print("calculating average distance")
        average_point_distance = utils.average_point_distance(file, "lon", "lat", "row", direction=rowdirection,
                                                              rem_duplicates=True, operation="mean")

    #set the interpolation radius based on threshold and average_point distance
    radius = "0.8" if average_point_distance <= threshold else str(average_point_distance * 2)

    print("defining interpolation grid")

    # define the interpolation grid
    if grid is None:
        # need a grid
        minx = math.floor(df['lon'].min())
        maxx = math.ceil(df['lon'].max())
        miny = math.floor(df['lat'].min())
        maxy = math.ceil(df['lat'].max())
        delty = maxy - miny  # height
        deltx = maxx - minx  # width
    else:  # the user passed a grid object
        minx = grid['minx']
        maxx = grid['maxx']
        miny = grid['miny']
        maxy = grid['maxy']
        delty = grid['delty']  # height
        deltx = grid['deltx']  # width

    ''''
    if filterzero: #todo find best way for filtering for multiple columns
        #keep, discard = filter.filter_byvalue(df, 0, ">", colname="d")
    else:
        keep = df
        discard = None
    if discard: discard.to_csv(folder + id + "_discard.csv", index=False)            
    '''

    # open the model for for creting gdal virtual files
    with open(jsonpath + "/vrtmodel.txt") as f:
        xml = f.read()

    ##### define rows

    print("extracting rows")

    if average_point_distance > threshold:  ########SPARSE DATA, create rasterized buffered polyline

        # 1 convert point to polyline
        utils2.csv_to_polyline_shapefile(df, ycol="lat", xcol="lon", linecol="row", epsg=project,
                                         outpath=outfolder + "/rows.shp")
        #  2 buffer the polyline
        utils2.buffer(outfolder + "/rows.shp", outfolder + "/rows_buffer.shp", buffersize)
        #  3rasterize poliline

        path = jsonpath + "/rasterize.json"
        params = utils.json_io(path, direction="in")

        params["-a"] = "id_row"
        # params["-te"]= str(minx) + " " + str(miny) + " " + str(maxx) + " " + str(maxy)
        params["-te"] = str(minx) + " " + str(maxy) + " " + str(maxx) + " " + str(miny)
        params["-ts"] = str(deltx * area_multiplier) + " " + str(delty * area_multiplier)
        params["-ot"] = "Int16"
        params["-a_nodata"] = str(nodata)
        params["src_datasource"] = outfolder + "rows_buffer.shp"
        params["dst_filename"] = outfolder + "/" + id + "_rows.tiff"

        # build gdal_grid request
        text = grd.build_gdal_rasterize_string(params, outtext=False)
        print(text)
        text = ["gdal_rasterize"] + text
        print(text)

        # call gdal_rasterize
        print("rasterizing the rows")
        out, err = utils.run_tool(text)
        print("output" + out)
        if err:
            print("error:" + err)
            raise Exception("gdal rasterize failed")


    else:  #############DENSE DATA   we are under the threshold, rasterize points with nearest neighbour

        if force_interpolation:
            data = {"layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon",
                    "northing": "lat", "elevation": "row"}
            utils2.make_vrt(xml, data, outfolder + "/" + id + "_keep_row.vrt")
            '''newxml = xml.format(**data)
            f = open(folder + id+"_keep_row.vrt", "w")
            f.write(newxml)
            f.close()'''

            path = jsonpath + "/nearest.json"
            params = utils.json_io(path, direction="in")

            params["-txe"] = str(minx) + " " + str(maxx)
            params["-tye"] = str(miny) + " " + str(maxy)
            params["-outsize"] = str(deltx * area_multiplier) + " " + str(delty * area_multiplier)
            params["-a"]["nodata"] = str(nodata)
            params["-a"]["radius1"] = radius
            params["-a"]["radius2"] = radius
            params["src_datasource"] = outfolder + "/" + id + "_keep_row.vrt"
            params["dst_filename"] = outfolder + "/" + id + "_rows.tiff"

            # build gdal_grid request
            text = grd.build_gdal_grid_string(params, outtext=False)
            print(text)
            text = ["gdal_grid"] + text
            print(text)

            # call gdal_grid
            print("Getting the row raster")
            out, err = utils.run_tool(text)
            print("output" + out)
            if err:
                print("error:" + err)
                raise Exception("gdal grid failed")

        else: # we calculate statistics on the vector data, no need for interpolation

            return berrycolor_workflow_dense(df, id)

    # extract index from the rows
    d = gdal.Open(outfolder + "/" + id + "_rows.tiff")
    row_index, row_indexed, row_properties = gdalIO.raster_dataset_to_indexed_numpy(d, id, maxbands=1,
                                                                                    bandLocation="byrow",
                                                                                    nodata=nodata)
    print("saving indexed array to disk")
    fileIO.save_object(outfolder + "/" + id + "_rows_index", (row_index, row_indexed, row_properties))
    d = None

    # output the 4 virtual files for the 4 columns
    for clr in ["a", "b", "c", "d"]:
        data = {"layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon",
                "northing": "lat", "elevation": clr}
        utils2.make_vrt(xml, data, outfolder + "/" + id + "_" + clr + ".vrt")
    # output the 2 virtual files for or raw fruit count and visible fruit count
    data = {"layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat",
            "elevation": "raw_fruit_count"}
    utils2.make_vrt(xml, data, outfolder + "/" + id + "_rawfruit.vrt")
    data = {"layername": os.path.basename(file).split(".")[0], "fullname": file, "easting": "lon", "northing": "lat",
            "elevation": "visible_fruit_per_m"}
    utils2.make_vrt(xml, data, outfolder + "/" + id + "_visiblefruit.vrt")

    # prepare interpolation parameters
    path = jsonpath + "/invdist.json"
    params = utils.json_io(path, direction="in")
    params["-txe"] = str(minx) + " " + str(maxx)
    params["-tye"] = str(miny) + " " + str(maxy)
    params["-outsize"] = str(deltx * area_multiplier) + " " + str(delty * area_multiplier)
    params["-a"]["radius1"] = radius
    params["-a"]["radius2"] = radius
    # params["-a"]["smoothing"] = "20"
    # params["-a"]["power"] = "0"
    params["-a"]["nodata"] = str(nodata)

    # first interpolate the count data
    for clr in ["raw", "visible"]:

        params["src_datasource"] = outfolder + "/" + id + "_" + clr + "fruit.vrt"
        params["dst_filename"] = outfolder + "/" + id + "_" + clr + "fruit.tiff"

        # print(params)
        # build gdal_grid request
        text = grd.build_gdal_grid_string(params, outtext=False)
        print(text)
        text = ["gdal_grid"] + text
        print(text)

        # call gdal_grid
        print("Interpolating for count " + clr)
        out, err = utils.run_tool(text)
        print("output" + out)
        if err:
            print("error:" + err)
            raise Exception("gdal grid failed")

    # upload to numpy
    d = gdal.Open(outfolder + "/" + id + "_rawfruit.tiff")
    band = d.GetRasterBand(1)
    # apply index
    new_r_indexed_raw = gdalIO.apply_index_to_single_band(band, row_index)
    d = None

    d = gdal.Open(outfolder + "/" + id + "_visiblefruit.tiff")
    band = d.GetRasterBand(1)
    # apply index
    new_r_indexed_visible = gdalIO.apply_index_to_single_band(band, row_index)
    d = None

    # check if all pixels have a value, otherwise assign nan to nodata value (wich will not be considered for statistics)
    if new_r_indexed_raw.min() == nodata:
        warnings.warn(
            "indexed data visible berry raw counr has nodata values, the current implementation will"
            " count this pixels as nozero values", RuntimeWarning)
        new_r_indexed_raw[new_r_indexed_raw == nodata] = 'nan'  # careful nan is float
    if new_r_indexed_visible.min() == nodata:
        warnings.warn(
            "indexed data visible berry per meters has nodata values, the current implementation will"
            " count this pixels as nozero values", RuntimeWarning)
        new_r_indexed_visible[new_r_indexed_visible == nodata] = 'nan'

    stats = {}

    for clr in ["a", "b", "c", "d"]:

        params["src_datasource"] = outfolder + "/" + id + "_" + clr + ".vrt"
        params["dst_filename"] = outfolder + "/" + id + "_" + clr + ".tiff"

        # build gdal_grid request
        text = grd.build_gdal_grid_string(params, outtext=False)
        print(text)
        text = ["gdal_grid"] + text
        print(text)

        # call gdal_grid
        print("interpolating for color " + clr)
        out, err = utils.run_tool(text)
        print("output" + out)
        if err:
            print("error:" + err)
            raise Exception("gdal grid failed")

        # upload to numpy
        d = gdal.Open(outfolder + "/" + id + "_" + clr + ".tiff")
        band = d.GetRasterBand(1)
        # apply index
        new_r_indexed = gdalIO.apply_index_to_single_band(band, row_index)  # this is the index from the rows
        d = None

        # check if all pixels have a value, otherwise assign nan to nodata value
        if new_r_indexed.min() == nodata:
            warnings.warn("indexed data for colorgrade " + clr + " has nodata values, the current implementation will"
                                                                 " count this pixels as nozero values", RuntimeWarning)
            new_r_indexed[new_r_indexed == nodata] = 'nan'  # careful nan is float

        stats[clr] = {}

        for i in np.unique(row_indexed):  # get the row numbers

            area = math.pow(1 / area_multiplier, 2)  # the pixel area

            # get a mask for the current row
            mask = row_indexed == i
            # statistics for current row

            # average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count,
            # average raw fruit count, std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm

            # r_indexed is 2d , while new_r_indexed and mask are 1d

            '''
            stats[clr][i] = [new_r_indexed[mask[0,:]].nanmean(), new_r_indexed[mask[0,:]].nanstd(),

                                                    #todo the sum considers nan different from 0
                                                   new_r_indexed[mask[0,:]].shape[0] * area, #could use .size?
                                                   np.count_nonzero(new_r_indexed[mask[0,:]]) * area,
                                                   new_r_indexed[mask[0,:]][new_r_indexed[mask[0,:]] == 0].shape[0] * area,

                                                   new_r_indexed_raw[mask[0,:]].nansum(),
                                                   new_r_indexed_raw[mask[0,:]].nanmean(),
                                                   new_r_indexed_raw[mask[0,:]].nanstd(),
                                                   new_r_indexed_visible[mask[0,:]].nansum(),
                                                   new_r_indexed_visible[mask[0,:]].nanmean(),
                                                   new_r_indexed_visible[mask[0,:]].nanstd()]
            '''
            stats[clr][i] = [np.nanmean(new_r_indexed[mask[0, :]]), np.nanstd(new_r_indexed[mask[0, :]]),

                             # todo the sum considers nan different from 0
                             new_r_indexed[mask[0, :]].shape[0] * area,  # could use .size?
                             np.count_nonzero(new_r_indexed[mask[0, :]]) * area,
                             new_r_indexed[mask[0, :]][new_r_indexed[mask[0, :]] == 0].shape[0] * area,

                             np.nansum(new_r_indexed_raw[mask[0, :]]),
                             np.nanmean(new_r_indexed_raw[mask[0, :]]),
                             np.nanstd(new_r_indexed_raw[mask[0, :]]),
                             np.nansum(new_r_indexed_visible[mask[0, :]]),
                             np.nanmean(new_r_indexed_visible[mask[0, :]]),
                             np.nanstd(new_r_indexed_visible[mask[0, :]])]

    return id, stats
예제 #6
0
    def berrycolor_workflow_2_old(
            file, folder="/vagrant/code/pysdss/data/output/text/", gdal=True):
        """
        testing interpolation and statistics along the line (this was the old workflow_2 for sparse data, when there
        was also the workflow_3, now there is only a workflow 2) (see colordata/colordata.py)

        use gdal False for scipy radial basis
        :return:
        """

        ############################ 1 download filtered data with the chosen and create a dataframe
        # create a folder to store the output
        id = utils.create_uniqueid()
        if not os.path.exists(folder + id):
            os.mkdir(folder + id)
        folder = folder + id + "/"

        #set the path to folder with settings
        jsonpath = os.path.join(os.path.dirname(__file__), '..',
                                "pysdss/experiments/interpolation/")
        jsonpath = os.path.normpath(jsonpath)

        #1 convert point to polyline

        utils2.csv_to_polyline_shapefile(file,
                                         ycol="lat",
                                         xcol="lon",
                                         linecol="row",
                                         epsg=32611,
                                         outpath=folder + "rows.shp")

        #############################  2 buffer the polyline

        utils2.buffer(folder + "rows.shp", folder + "rows_buffer.shp", 0.25)

        ##############################  3rasterize poliline

        #need a grid
        df = pd.read_csv(file)
        minx = math.floor(df['lon'].min())
        maxx = math.ceil(df['lon'].max())
        miny = math.floor(df['lat'].min())
        maxy = math.ceil(df['lat'].max())
        delty = maxy - miny  #height
        deltx = maxx - minx  #width

        area_multiplier = 2  #2 to double the resulution and halve the pixel size to 0.5 meters

        path = jsonpath + "/rasterize.json"
        params = utils.json_io(path, direction="in")

        params["-a"] = "id_row"
        params["-te"] = str(minx) + " " + str(miny) + " " + str(
            maxx) + " " + str(maxy)
        params["-ts"] = str(deltx * 2) + " " + str(
            delty * 2)  #pixel 0,5 meters
        params["-ot"] = "Int16"

        params["src_datasource"] = folder + "rows_buffer.shp"
        params["dst_filename"] = folder + "rows_buffer.tiff"

        # build gdal_grid request
        text = grd.build_gdal_rasterize_string(params, outtext=False)
        print(text)
        text = ["gdal_rasterize"] + text
        print(text)

        # call gdal_rasterize
        print("rasterizing the rows")
        out, err = utils.run_tool(text)
        print("output" + out)
        if err:
            print("error:" + err)
            raise Exception("gdal rasterize failed")

        #################################   4get buffered poliline index

        d = gdal.Open(folder + "rows_buffer.tiff")
        row_index, row_indexed, row_properties = gdalIO.raster_dataset_to_indexed_numpy(
            d, id, maxbands=1, bandLocation="byrow", nodata=-1)

        print("saving indexed array to disk")
        fileIO.save_object(folder + "rows_buffer_index",
                           (row_index, row_indexed, row_properties))
        d = None

        ###################################   5interpolate points, use the index to extract statistics along the line

        with open(jsonpath + "/vrtmodel.txt") as f:
            xml = f.read()

        # output the 4 virtual files for the 4 columns
        for clr in ["a", "b", "c", "d"]:
            data = {
                "layername": os.path.basename(file).split(".")[0],
                "fullname": file,
                "easting": "lon",
                "northing": "lat",
                "elevation": clr
            }
            utils2.make_vrt(xml, data, folder + "_" + clr + ".vrt")
        # output the 2 virtual files for or raw fruit count and visible fruit count
        data = {
            "layername": os.path.basename(file).split(".")[0],
            "fullname": file,
            "easting": "lon",
            "northing": "lat",
            "elevation": "raw_fruit_count"
        }
        utils2.make_vrt(xml, data, folder + "_rawfruit.vrt")
        data = {
            "layername": os.path.basename(file).split(".")[0],
            "fullname": file,
            "easting": "lon",
            "northing": "lat",
            "elevation": "visible_fruit_per_m"
        }
        utils2.make_vrt(xml, data, folder + "_visiblefruit.vrt")

        # interpolate

        if gdal:
            path = jsonpath + "/invdist.json"
            params = utils.json_io(path, direction="in")
            params["-txe"] = str(minx) + " " + str(maxx)
            params["-tye"] = str(miny) + " " + str(maxy)
            params["-outsize"] = str(deltx * area_multiplier) + " " + str(
                delty * area_multiplier)
            params["-a"]["radius1"] = "10"
            params["-a"]["radius2"] = "10"
            params["-a"]["smoothing"] = "20"
            params["-a"]["power"] = "0"

        else:  #scipy
            #set up the interpolation grid
            tix = np.linspace(minx, maxx, deltx * 2)
            tiy = np.linspace(miny, maxy, delty * 2)
            XI, YI = np.meshgrid(tix, tiy)

        if gdal:
            for clr in ["raw", "visible"]:

                params["src_datasource"] = folder + "_" + clr + "fruit.vrt"
                params["dst_filename"] = folder + "_" + clr + "fruit.tiff"

                #print(params)
                # build gdal_grid request
                text = grd.build_gdal_grid_string(params, outtext=False)
                print(text)
                text = ["gdal_grid"] + text
                print(text)

                # call gdal_grid
                print("Interpolating for count " + clr)
                out, err = utils.run_tool(text)
                print("output" + out)
                if err:
                    print("error:" + err)
                    raise Exception("gdal grid failed")
        else:
            for clr in ["raw_fruit_count", "visible_fruit_per_m"]:
                rbf = Rbf(df['lon'].values, df['lat'].values, df[clr].values)
                ZI = rbf(XI, YI)
                print()

        # upload to numpy
        d = gdal.Open(folder + "_rawfruit.tiff")
        band = d.GetRasterBand(1)
        # apply index
        new_r_indexed_raw = gdalIO.apply_index_to_single_band(band, row_index)
        d = None

        d = gdal.Open(folder + "_visiblefruit.tiff")
        band = d.GetRasterBand(1)
        # apply index
        new_r_indexed_visible = gdalIO.apply_index_to_single_band(
            band, row_index)
        d = None

        stats = {}

        for clr in ["a", "b", "c", "d"]:
            params["src_datasource"] = folder + "_" + clr + ".vrt"
            params["dst_filename"] = folder + "_" + clr + ".tiff"

            # build gdal_grid request
            text = grd.build_gdal_grid_string(params, outtext=False)
            print(text)
            text = ["gdal_grid"] + text
            print(text)

            # call gdal_grid
            print("interpolating for color " + clr)
            out, err = utils.run_tool(text)
            print("output" + out)
            if err:
                print("error:" + err)
                raise Exception("gdal grid failed")

            # upload to numpy
            d = gdal.Open(folder + "_" + clr + ".tiff")
            band = d.GetRasterBand(1)
            # apply index
            new_r_indexed = gdalIO.apply_index_to_single_band(
                band, row_index)  # this is the index from the rows
            d = None

            stats[clr] = {}

            for i in np.unique(row_indexed):  # get the row numbers

                area = math.pow(1 / area_multiplier, 2)

                # get a mask for the current row
                mask = row_indexed == i
                # statistics for current row

                # average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count,
                # average raw fruit count, std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm

                # r_indexed is 2d , while new_r_indexed and mask are 1d

                stats[clr][i] = [
                    new_r_indexed[mask[0, :]].mean(),
                    new_r_indexed[mask[0, :]].std(),
                    new_r_indexed[mask[0, :]].shape[0] * area,
                    np.count_nonzero(new_r_indexed[mask[0, :]]) * area,
                    new_r_indexed[mask[0, :]][new_r_indexed[mask[0, :]] ==
                                              0].shape[0] * area,
                    new_r_indexed_raw[mask[0, :]].sum(),
                    new_r_indexed_raw[mask[0, :]].mean(),
                    new_r_indexed_raw[mask[0, :]].std(),
                    new_r_indexed_visible[mask[0, :]].sum(),
                    new_r_indexed_visible[mask[0, :]].mean(),
                    new_r_indexed_visible[mask[0, :]].std()
                ]

        return id, stats