Example #1
0
def col_list_val_to_row(pndDf, colWithLists, geomCol=None, epsg=None):
    """
    Convert a dataframe:
    
       | col_a | col_b | col_c
     0 |   X   |   X   |   1
     1 |   X   |   X   | [2,3]
     
    To:
       | col_a | col_b | col_c
     0 |   X   |   X   |   1
     1 |   X   |   X   |   2
     2 |   X   |   X   |   3
    """
    def desmembrate(row, row_acc, target_col):
        if type(row[target_col]) != list:
            row_acc.append(row.to_dict())

        else:
            for geom in row[target_col]:
                new_row = row.to_dict()
                new_row[target_col] = geom
                row_acc.append(new_row)

    new_rows = []
    pndDf.apply(lambda x: desmembrate(x, new_rows, colWithLists), axis=1)

    # Convert again to DataFrame
    if geomCol and epsg:
        from glass.g.it.pd import df_to_geodf

        return df_to_geodf(new_rows, geomCol, epsg)

    else:
        import pandas

        return pandas.DataFrame(new_rows)
Example #2
0
def lst_prod_by_cell_and_year(shp,
                              id_col,
                              year,
                              outshp,
                              platform="Sentinel-2",
                              processingl='Level-2A',
                              epsg=32629):
    """
    Get a list of images:
    * one for each grid in shp;
    * one for each month in one year - the choosen image will be the one
    with lesser area occupied by clouds;
    total_images = grid_number * number_months_year
    """

    from glass.g.rd.shp import shp_to_obj
    from glass.ng.pd import merge_df
    from glass.g.wt.shp import df_to_shp
    from glass.g.it.pd import df_to_geodf

    months = {
        '01': '31',
        '02': '28',
        '03': '31',
        '04': '30',
        '05': '31',
        '06': '30',
        '07': '31',
        '08': '31',
        '09': '30',
        '10': '31',
        '11': '30',
        '12': '31'
    }

    # Open SHP
    grid = shp_to_obj(shp, srs_to=4326)

    def get_grid_id(row):
        row['cellid'] = row.title.split('_')[5][1:]

        return row

    # Search for images
    dfs = []
    for idx, cell in grid.iterrows():
        for k in months:
            start = "{}{}01".format(str(year), k)
            end = "{}{}{}".format(str(year), k, months[k])

            if year == 2018 and processingl == 'Level-2A':
                if k == '01' or k == '02':
                    plevel = 'Level-2Ap'
                else:
                    plevel = processingl
            else:
                plevel = processingl

        prod = lst_prod(cell.geometry.wkt,
                        start,
                        end,
                        platname=platform,
                        procLevel=plevel)

        if not prod.shape[0]:
            continue

        # Get area
        prod = prod.to_crs('EPSG:{}'.format(str(epsg)))
        prod['areav'] = prod.geometry.area / 1000000

        # We want only images with more than 70% of data
        prod = prod[prod.areav >= 7000]

        # ID Cell ID
        prod = prod.apply(lambda x: get_grid_id(x), axis=1)
        # Filter Cell ID
        prod = prod[prod.cellid == cell[id_col]]

        # Sort by cloud cover and date
        prod = prod.sort_values(['cloudcoverpercentage', 'ingestiondate'],
                                ascending=[True, True])

        # Get only the image with less cloud cover
        prod = prod.head(1)

        dfs.append(prod)

    fdf = merge_df(dfs)
    fdf = df_to_geodf(fdf, 'geometry', epsg)

    df_to_shp(fdf, outshp)

    return outshp
Example #3
0
def join_attr_by_distance(mainTable, joinTable, workGrass, epsg_code, output):
    """
    Find nearest feature and join attributes of the nearest feature
    to the mainTable
    
    Uses GRASS GIS to find near lines.
    """

    import os
    from glass.g.wenv.grs import run_grass
    from glass.g.rd.shp import shp_to_obj
    from glass.g.it.pd import df_to_geodf
    from glass.g.wt.shp import df_to_shp
    from glass.pys.oss import fprop

    # Create GRASS GIS Location
    grassBase = run_grass(workGrass, location='join_loc', srs=epsg_code)

    import grass.script as grass
    import grass.script.setup as gsetup
    gsetup.init(grassBase, workGrass, 'join_loc', 'PERMANENT')

    # Import some GRASS GIS tools
    from glass.g.gp.prox import grs_near as near
    from glass.g.it.shp import shp_to_grs, grs_to_shp

    # Import data into GRASS GIS
    grsMain = shp_to_grs(mainTable, fprop(mainTable, 'fn', forceLower=True))
    grsJoin = shp_to_grs(joinTable, fprop(joinTable, 'fn', forceLower=True))

    # Get distance from each feature of mainTable to the nearest feature
    # of the join table
    near(grsMain, grsJoin, nearCatCol="tocat", nearDistCol="todistance")

    # Export data from GRASS GIS
    ogrMain = grs_to_shp(grsMain,
                         os.path.join(workGrass, 'join_loc',
                                      grsMain + '_grs.shp'),
                         None,
                         asMultiPart=True)
    ogrJoin = grs_to_shp(grsJoin,
                         os.path.join(workGrass, 'join_loc',
                                      grsJoin + '_grs.shp'),
                         None,
                         asMultiPart=True)

    dfMain = shp_to_obj(ogrMain)
    dfJoin = shp_to_obj(ogrJoin)

    dfResult = dfMain.merge(dfJoin,
                            how='inner',
                            left_on='tocat',
                            right_on='cat')

    dfResult.drop(["geometry_y", "cat_y"], axis=1, inplace=True)
    dfResult.rename(columns={"cat_x": "cat_grass"}, inplace=True)

    dfResult["tocat"] = dfResult["tocat"] - 1
    dfResult["cat_grass"] = dfResult["cat_grass"] - 1

    dfResult = df_to_geodf(dfResult, "geometry_x", epsg_code)

    df_to_shp(dfResult, output)

    return output
Example #4
0
def joinLines_by_spatial_rel_raster(mainLines, mainId, joinLines, joinCol,
                                    outfile, epsg):
    """
    Join Attributes based on a spatial overlap.
    An raster based approach
    """

    import os
    import pandas
    from glass.g.rd.shp import shp_to_obj
    from glass.g.wt.shp import df_to_shp
    from glass.g.gp.ext import shpext_to_boundshp
    from glass.g.dp.torst import shp_to_rst
    from glass.g.it.pd import df_to_geodf
    from glass.g.wenv.grs import run_grass
    from glass.ng.pd.joins import join_dfs
    from glass.ng.pd.agg import df_groupBy
    from glass.pys.oss import fprop, mkdir

    workspace = mkdir(os.path.join(os.path.dirname(mainLines, 'tmp_dt')))

    # Create boundary file
    boundary = shpext_to_boundshp(mainLines,
                                  os.path.join(workspace, "bound.shp"), epsg)

    boundRst = shp_to_rst(boundary,
                          None,
                          5,
                          -99,
                          os.path.join(workspace, "rst_base.tif"),
                          epsg=epsg,
                          api='gdal')

    # Start GRASS GIS Session
    gbase = run_grass(workspace, location="grs_loc", srs=boundRst)

    import grass.script as grass
    import grass.script.setup as gsetup

    gsetup.init(gbase, workspace, "grs_loc", "PERMANENT")

    from glass.g.rst.local import combine
    from glass.g.prop.rst import get_rst_report_data
    from glass.g.it.shp import shp_to_grs, grs_to_shp
    from glass.g.dp.torst import grsshp_to_grsrst as shp_to_rst

    # Add data to GRASS GIS
    mainVector = shp_to_grs(mainLines, fprop(mainLines, 'fn', forceLower=True))
    joinVector = shp_to_grs(joinLines, fprop(joinLines, 'fn', forceLower=True))

    mainRst = shp_to_rst(mainVector, mainId, f"rst_{mainVector}")
    joinRst = shp_to_rst(joinVector, joinCol, f"rst_{joinVector}")

    combRst = combine(mainRst, joinRst, "combine_rst", api="pygrass")

    combine_data = get_rst_report_data(combRst, UNITS="c")

    combDf = pandas.DataFrame(combine_data,
                              columns=["comb_cat", "rst_1", "rst_2", "ncells"])
    combDf = combDf[combDf["rst_2"] != '0']
    combDf["ncells"] = combDf["ncells"].astype(int)

    gbdata = df_groupBy(combDf, ["rst_1"], "MAX", "ncells")

    fTable = join_dfs(gbdata, combDf, ["rst_1", "ncells"], ["rst_1", "ncells"])

    fTable["rst_2"] = fTable["rst_2"].astype(int)
    fTable = df_groupBy(fTable, ["rst_1", "ncells"],
                        STAT='MIN',
                        STAT_FIELD="rst_2")

    mainLinesCat = grs_to_shp(mainVector,
                              os.path.join(workspace, mainVector + '.shp'),
                              'line')

    mainLinesDf = shp_to_obj(mainLinesCat)

    resultDf = join_dfs(mainLinesDf,
                        fTable,
                        "cat",
                        "rst_1",
                        onlyCombinations=None)

    resultDf.rename(columns={"rst_2": joinCol}, inplace=True)

    resultDf = df_to_geodf(resultDf, "geometry", epsg)

    df_to_shp(resultDf, outfile)

    return outfile
Example #5
0
def closest_facility(incidents,
                     incidents_id,
                     facilities,
                     output,
                     impedance='TravelTime'):
    """
    impedance options:
    * TravelTime;
    * WalkTime;
    """

    import requests
    import pandas as pd
    import numpy as np
    from glass.cons.esri import rest_token, CF_URL
    from glass.g.it.esri import json_to_gjson
    from glass.g.rd.shp import shp_to_obj
    from glass.g.wt.shp import df_to_shp
    from glass.ng.pd.split import df_split
    from glass.ng.pd import merge_df
    from glass.g.prop.prj import get_shp_epsg
    from glass.g.prj.obj import df_prj
    from glass.g.it.pd import df_to_geodf
    from glass.g.it.pd import json_obj_to_geodf
    from glass.cons.esri import get_tv_by_impedancetype

    # Get API token
    token = rest_token()

    # Data to Pandas DataFrames
    fdf = shp_to_obj(facilities)
    idf = shp_to_obj(incidents)

    # Re-project to WGS84
    fdf = df_prj(fdf, 4326)
    idf = df_prj(idf, 4326)

    # Geomtries to Str - inputs for requests
    fdf['coords'] = fdf.geometry.x.astype(str) + ',' + fdf.geometry.y.astype(
        str)
    idf['coords'] = idf.geometry.x.astype(str) + ',' + idf.geometry.y.astype(
        str)

    # Delete geometry from facilities DF
    idf.drop(['geometry'], axis=1, inplace=True)

    # Split data
    # ArcGIS API only accepts 100 facilities
    # # and 100 incidents in each request
    fdfs = df_split(fdf, 100, nrows=True) if fdf.shape[0] > 100 else [fdf]
    idfs = df_split(idf, 100, nrows=True) if idf.shape[0] > 100 else [idf]

    for i in range(len(idfs)):
        idfs[i].reset_index(inplace=True)
        idfs[i].drop(['index'], axis=1, inplace=True)

    for i in range(len(fdfs)):
        fdfs[i].reset_index(inplace=True)
        fdfs[i].drop(['index'], axis=1, inplace=True)

    # Get travel mode
    tv = get_tv_by_impedancetype(impedance)

    # Ask for results
    results = []

    drop_cols = [
        'ObjectID', 'FacilityID', 'FacilityRank', 'Name',
        'IncidentCurbApproach', 'FacilityCurbApproach', 'IncidentID',
        'StartTime', 'EndTime', 'StartTimeUTC', 'EndTimeUTC', 'Total_Minutes',
        'Total_TruckMinutes', 'Total_TruckTravelTime', 'Total_Miles'
    ]

    if impedance == 'WalkTime':
        tv_col = 'walktime'
        rn_cols = {'Total_WalkTime': tv_col}

        ndrop = ['Total_Kilometers', 'Total_TravelTime', 'Total_Minutes']

    elif impedance == 'metric':
        tv_col = 'kilomts'
        rn_cols = {'Total_Kilometers': tv_col}

        ndrop = ['Total_WalkTime', 'Total_TravelTime', 'Total_Minutes']

    else:
        tv_col = 'traveltime'
        rn_cols = {'Total_TravelTime': tv_col}

        ndrop = ['Total_Kilometers', 'Total_WalkTime', 'Total_Minutes']

    drop_cols.extend(ndrop)

    for i_df in idfs:
        incidents_str = i_df.coords.str.cat(sep=';')

        for f_df in fdfs:
            facilities_str = f_df.coords.str.cat(sep=';')

            # Make request
            r = requests.get(CF_URL,
                             params={
                                 'facilities': facilities_str,
                                 'incidents': incidents_str,
                                 'token': token,
                                 'f': 'json',
                                 'travelModel': tv,
                                 'defaultTargetFacilityCount': '1',
                                 'returnCFRoutes': True,
                                 'travelDirection':
                                 'esriNATravelDirectionToFacility',
                                 'impedanceAttributeName': impedance
                             })

            if r.status_code != 200:
                raise ValueError('Error when requesting from: {}'.format(
                    str(r.url)))

            # Convert ESRI json to GeoJson
            esri_geom = r.json()
            geom = json_to_gjson(esri_geom.get('routes'))

            # GeoJSON to GeoDataFrame
            gdf = json_obj_to_geodf(geom, 4326)

            # Delete unwanted columns
            gdf.drop(drop_cols, axis=1, inplace=True)

            # Rename some interest columns
            gdf.rename(columns=rn_cols, inplace=True)

            # Add to results original attributes of incidents
            r_df = gdf.merge(i_df,
                             how='left',
                             left_index=True,
                             right_index=True)

            results.append(r_df)

    # Compute final result
    # Put every DataFrame in a single DataFrame
    fgdf = merge_df(results)

    # Since facilities were divided
    # The same incident has several "nearest" facilities
    # We just want one neares facility
    # Lets group by using min operator
    gpdf = pd.DataFrame(fgdf.groupby([incidents_id]).agg({tv_col: 'min'
                                                          })).reset_index()

    gpdf.rename(columns={incidents_id: 'iid', tv_col: 'impcol'}, inplace=True)

    # Recovery geometry
    fgdf = fgdf.merge(gpdf, how='left', left_on=incidents_id, right_on='iid')
    fgdf = fgdf[fgdf[tv_col] == fgdf.impcol]
    fgdf = df_to_geodf(fgdf, 'geometry', 4326)

    # Remove repeated units
    g = fgdf.groupby('iid')
    fgdf['rn'] = g[tv_col].rank(method='first')
    fgdf = fgdf[fgdf.rn == 1]

    fgdf.drop(['iid', 'rn'], axis=1, inplace=True)

    # Re-project to original SRS
    epsg = get_shp_epsg(facilities)
    fgdf = df_prj(fgdf, epsg)

    # Export result
    df_to_shp(fgdf, output)

    return output