Example #1
0
def dsn_data_collection_by_multibuffer(inBuffers, workspace, db, datasource,
                                       keywords=None):
    """
    Extract Digital Social Network Data for each sub-buffer in buffer.
    A sub-buffer is a buffer with a radius equals to the main buffer radius /2
    and with a central point at North, South, East, West, Northeast, Northwest,
    Southwest and Southeast of the main buffer central point.
    
    inBuffers = {
        "lisbon"    : {
            'x'      : -89004.994779, # in meters
            'y'      : -102815.866054, # in meters
            'radius' : 10000,
            'epsg'   : 3763
        },
        "london     : {
            'x'      : -14210.551441, # in meters
            'y'      : 6711542.47559, # in meters
            'radius' : 10000,
            'epsg'   : 3857
        }
    }
    or
    inBuffers = {
        "lisbon" : {
            "path" : /path/to/file.shp,
            "epsg" : 3763
        }
    }
    
    keywords = ['flood', 'accident', 'fire apartment', 'graffiti', 'homeless']
    
    datasource = 'facebook' or datasource = 'flickr'
    TODO: Only works for Flickr and Facebook
    """
    
    import os; from osgeo import ogr
    from gasp.pyt         import obj_to_lst
    from gasp.sql.db      import create_db
    from gasp.sql.to      import q_to_ntbl
    from gasp.sql.to      import df_to_db
    from gasp.gql.to      import shp_to_psql
    from gasp.gt.toshp    import df_to_shp
    from gasp.gt.toshp.db import dbtbl_to_shp
    from gasp.gt.prox.bf  import get_sub_buffers, dic_buffer_array_to_shp
    
    if datasource == 'flickr':
        from gasp.sde.dsn.flickr import photos_location
    
    elif datasource == 'facebook':
        from gasp.sde.dsn.fb.places import places_by_query
    
    keywords = obj_to_lst(keywords)
    keywords = ["None"] if not keywords else keywords
    
    # Create Database to Store Data
    create_db(db, overwrite=True, api='psql')
    
    for city in inBuffers:
        # Get Smaller Buffers
        if "path" in inBuffers[city]:
            # Get X, Y and Radius
            from gasp.gt.prop.feat.bf import bf_prop
            
            __bfprop = bf_prop(
                inBuffers[city]["path"], inBuffers[city]["epsg"], isFile=True
            )
            
            inBuffers[city]["x"]      = __bfprop["X"]
            inBuffers[city]["y"]      = __bfprop["Y"]
            inBuffers[city]["radius"] = __bfprop["R"]
        
        inBuffers[city]["list_buffer"] = [{
            'X' : inBuffers[city]["x"], 'Y' : inBuffers[city]["y"],
            'RADIUS' : inBuffers[city]['radius'], 'cardeal' : 'major'
        }] + get_sub_buffers(
            inBuffers[city]["x"], inBuffers[city]["y"],
            inBuffers[city]["radius"]
        )
        
        # Smaller Buffers to File
        multiBuffer = os.path.join(workspace, 'buffers_{}.shp'.format(city))
        dic_buffer_array_to_shp(
            inBuffers[city]["list_buffer"], multiBuffer,
            inBuffers[city]['epsg'], fields={'cardeal' : ogr.OFTString}
        )
        
        # Retrive data for each keyword and buffer
        # Record these elements in one dataframe
        c       = None
        tblData = None
        for bf in inBuffers[city]["list_buffer"]:
            for k in keywords:
                if datasource == 'flickr':
                    tmpData = photos_location(
                        bf, inBuffers[city]["epsg"],
                        keyword=k if k != 'None' else None,
                        epsg_out=inBuffers[city]["epsg"],
                        onlySearchAreaContained=False
                    )
                
                elif datasource == 'facebook':
                    tmpData = places_by_query(
                        bf, inBuffers[city]["epsg"],
                        keyword=k if k != 'None' else None,
                        epsgOut=inBuffers[city]["epsg"],
                        onlySearchAreaContained=False
                    )
                
                if type(tmpData) == int:
                    print("NoData finded for buffer '{}' and keyword '{}'".format(
                        bf['cardeal'], k
                    ))
                    
                    continue
                
                tmpData["keyword"]   = k
                tmpData["buffer_or"] = bf["cardeal"]
                
                if not c:
                    tblData = tmpData
                    c = 1
                else:
                    tblData = tblData.append(tmpData, ignore_index=True)
        
        inBuffers[city]["data"] = tblData
        
        # Get data columns names
        cols = inBuffers[city]["data"].columns.values
        dataColumns = [
            c for c in cols if c != 'geom' and c != 'keyword' \
            and c != 'buffer_or' and c != 'geometry'
        ]
        
        # Send data to PostgreSQL
        if 'geometry' in cols:
            cgeom = 'geometry'
        
        else:
            cgeom = 'geom'
        
        inBuffers[city]["table"] = 'tbldata_{}'.format(city)
        
        df_to_db(
            db, inBuffers[city]["data"],
            inBuffers[city]["table"], api='psql',
            epsg=inBuffers[city]["epsg"], geomType='POINT', colGeom=cgeom
        )
        
        # Send Buffers data to PostgreSQL
        inBuffers[city]["pg_buffer"] = shp_to_psql(
            db, multiBuffer, pgTable='buffers_{}'.format(city),
            api="shp2pgsql", srsEpsgCode=inBuffers[city]["epsg"]
        )
        
        inBuffers[city]["filter_table"] = q_to_ntbl(
            db, "filter_{}".format(inBuffers[city]["table"]), (
                "SELECT srcdata.*, "
                "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) "
                "AS intersect_buffer FROM ("
                    "SELECT {cols}, keyword, geom, "
                    "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer "
                    "FROM {pgtable} "
                    "GROUP BY {cols}, keyword, geom"
                ") AS srcdata, ("
                    "SELECT cardeal, geom AS bfg FROM {bftable}"
                ") AS buffersg "
                "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS TRUE "
                "GROUP BY {cols}, keyword, geom, extracted_buffer"
            ).format(
                cols    = ", ".join(dataColumns),
                pgtable = inBuffers[city]["table"],
                bftable = inBuffers[city]["pg_buffer"]
            ), api='psql'
        )
        
        inBuffers[city]["outside_table"] = q_to_ntbl(
            db, "outside_{}".format(inBuffers[city]["table"]), (
                "SELECT * FROM ("
                "SELECT srcdata.*, "
                "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) "
                "AS not_intersect_buffer FROM ("
                    "SELECT {cols}, keyword, geom, "
                    "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer "
                    "FROM {pgtable} "
                    "GROUP BY {cols}, keyword, geom"
                ") AS srcdata, ("
                    "SELECT cardeal, geom AS bfg FROM {bftable}"
                ") AS buffersg "
                "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS NOT TRUE "
                "GROUP BY {cols}, keyword, geom, extracted_buffer"
                ") AS foo WHERE array_length(not_intersect_buffer, 1) = 9"
            ).format(
                cols    = ", ".join(dataColumns),
                pgtable = inBuffers[city]["table"],
                bftable = inBuffers[city]["pg_buffer"]
            ), api='psql'
        )
        
        # Union these two tables
        inBuffers[city]["table"] = q_to_ntbl(db, "data_{}".format(city), (
            "SELECT * FROM {intbl} UNION ALL "
            "SELECT {cols}, keyword, geom, extracted_buffer, "
            "CASE WHEN array_length(not_intersect_buffer, 1) = 9 "
            "THEN '{array_symbol}' ELSE not_intersect_buffer END AS "
            "intersect_buffer FROM {outbl}"
        ).format(
            intbl        = inBuffers[city]["filter_table"],
            outbl        = inBuffers[city]["outside_table"],
            cols         = ", ".join(dataColumns),
            array_symbol = '{' + '}'
        ), api='psql')
        
        """
        Get Buffers table with info related:
        -> pnt_obtidos = nr pontos obtidos usando esse buffer
        -> pnt_obtidos_fora = nt pontos obtidos fora desse buffer, mas 
        obtidos com ele
        -> pnt_intersect = nt pontos que se intersectam com o buffer
        -> pnt_intersect_non_obtain = nr pontos que se intersectam mas nao 
        foram obtidos como buffer
        """
        inBuffers[city]["pg_buffer"] = q_to_ntbl(
            db, "dt_{}".format(inBuffers[city]["pg_buffer"]), (
                "SELECT main.*, get_obtidos.pnt_obtidos, "
                "obtidos_fora.pnt_obtidos_fora, intersecting.pnt_intersect, "
                "int_not_obtained.pnt_intersect_non_obtain "
                "FROM {bf_table} AS main "
                "LEFT JOIN ("
                    "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos "
                    "FROM {bf_table} AS bf "
                    "INNER JOIN {dt_table} AS dt "
                    "ON bf.cardeal = ANY(dt.extracted_buffer) "
                    "GROUP BY gid, cardeal"
                ") AS get_obtidos ON main.gid = get_obtidos.gid "
                "LEFT JOIN ("
                    "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos_fora "
                    "FROM {bf_table} AS bf "
                    "INNER JOIN {dt_table} AS dt "
                    "ON bf.cardeal = ANY(dt.extracted_buffer) "
                    "WHERE ST_Intersects(bf.geom, dt.geom) IS NOT TRUE "
                    "GROUP BY gid, cardeal"
                ") AS obtidos_fora ON main.gid = obtidos_fora.gid "
                "LEFT JOIN ("
                    "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect "
                    "FROM {bf_table} AS bf "
                    "INNER JOIN {dt_table} AS dt "
                    "ON bf.cardeal = ANY(dt.intersect_buffer) "
                    "GROUP BY gid, cardeal"
                ") AS intersecting ON main.gid = intersecting.gid "
                "LEFT JOIN ("
                    "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect_non_obtain "
                    "FROM {bf_table} AS bf "
                    "INNER JOIN {dt_table} AS dt "
                    "ON bf.cardeal = ANY(dt.intersect_buffer) "
                    "WHERE NOT (bf.cardeal = ANY(dt.extracted_buffer)) "
                    "GROUP BY gid, cardeal"
                ") AS int_not_obtained "
                "ON main.gid = int_not_obtained.gid "
                "ORDER BY main.gid"
            ).format(
                bf_table = inBuffers[city]["pg_buffer"],
                dt_table = inBuffers[city]["table"]
            ), api='psql'
        )
        
        """
        Get Points table with info related:
        -> nobtido = n vezes um ponto foi obtido
        -> obtido_e_intersect = n vezes um ponto foi obtido usando um buffer 
        com o qual se intersecta
        -> obtido_sem_intersect = n vezes um ponto foi obtido usando um buffer
        com o qual nao se intersecta
        -> nintersect = n vezes que um ponto se intersecta com um buffer
        -> intersect_sem_obtido = n vezes que um ponto nao foi obtido apesar
        de se intersectar com o buffer
        """
        inBuffers[city]["table"] = q_to_ntbl(
            db, "info_{}".format(city), (
                "SELECT {cols}, dt.keyword, dt.geom, "
                "CAST(dt.extracted_buffer AS text) AS extracted_buffer, "
                "CAST(dt.intersect_buffer AS text) AS intersect_buffer, "
                "array_length(extracted_buffer, 1) AS nobtido, "
                "SUM(CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
                    "THEN 1 ELSE 0 END) AS obtido_e_intersect, "
                "(array_length(extracted_buffer, 1) - SUM("
                    "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
                    "THEN 1 ELSE 0 END)) AS obtido_sem_intersect, "
                "array_length(intersect_buffer, 1) AS nintersect, "
                "(array_length(intersect_buffer, 1) - SUM("
                    "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
                    "THEN 1 ELSE 0 END)) AS intersect_sem_obtido "
                "FROM {dt_table} AS dt "
                "INNER JOIN {bf_table} AS bf "
                "ON bf.cardeal = ANY(dt.extracted_buffer) "
                "GROUP BY {cols}, dt.keyword, dt.geom, "
                "dt.extracted_buffer, dt.intersect_buffer"
            ).format(
                dt_table = inBuffers[city]["table"],
                bf_table = inBuffers[city]["pg_buffer"],
                cols     = ", ".join(["dt.{}".format(x) for x in dataColumns])
            ), api='psql'
        )
        
        # Export Results
        dbtbl_to_shp(
            db, inBuffers[city]["table"], 'geom',
            os.path.join(workspace, "{}.shp".format(inBuffers[city]["table"])),
            api='psql', epsg=inBuffers[city]["epsg"]
        )
        
        dbtbl_to_shp(
            db, inBuffers[city]["pg_buffer"], 'geom',
            os.path.join(workspace, "{}.shp".format(inBuffers[city]["pg_buffer"])),
            api='psql', epsg=inBuffers[city]["epsg"]
        )
    
    return inBuffers
Example #2
0
def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE):
    """
    Search for data in DSN and other platforms by cell
    """
    
    import time;
    from gasp.gt.fmshp          import shp_to_obj
    from gasp.sql.db            import create_db
    from gasp.sde.dsn.fb.places import places_by_query
    from gasp.g.prj             import df_prj
    from gasp.pyt.df.to         import merge_df
    from gasp.gt.toshp.db       import dbtbl_to_shp
    from gasp.sql.to            import q_to_ntbl
    from gasp.sql.to            import df_to_db
    
    # Open GRID SHP
    GRID_DF = shp_to_obj(GRID_PNT)
    GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF
    
    GRID_DF["lng"]     = GRID_DF.geometry.x.astype(float)
    GRID_DF["lat"]     = GRID_DF.geometry.y.astype(float)
    GRID_DF["grid_id"] = GRID_DF.index
    
    # GET DATA
    RESULTS = []
    def get_data(row, datasrc):
        if datasrc == 'facebook':
            d = places_by_query(
                {'x' : row.lng, 'y' : row.lat, 'r' : RADIUS}, 4326,
                keyword=None, epsgOut=EPSG, _limit='100',
                onlySearchAreaContained=None
            )
        
        else:
            raise ValueError('{} as datasource is not a valid value'.format(datasrc))
        
        if type(d) == int:
            return
        
        d['grid_id'] = row.grid_id
        
        RESULTS.append(d)
        
        time.sleep(5)
    
    GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1)
    
    RT = merge_df(RESULTS)
    
    # Create DB
    create_db(db, overwrite=True, api='psql')
    
    # Send Data to PostgreSQL
    df_to_db(
        db, RT, "{}_data".format(DATA_SOURCE),
        EPSG, "POINT",
        colGeom='geometry' if 'geometry' in RT.columns.values else 'geom'
    )
    
    COLS = [
        x for x in RT.columns.values if x != "geometry" and \
        x != 'geom' and x != "grid_id"
    ] + ["geom"]
    
    GRP_BY_TBL = q_to_ntbl(db, "{}_grpby".format(DATA_SOURCE), (
        "SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id "
        "FROM {dtsrc}_data GROUP BY {cols}"
    ).format(cols=", ".join(COLS), dtsrc=DATA_SOURCE), api='psql')
    
    dbtbl_to_shp(
        db, GRP_BY_TBL, "geom", OUTPUT_TABLE,
        api="psql", epsg=EPSG
    )
    
    return OUTPUT_TABLE
Example #3
0
def check_shape_diff(SHAPES_TO_COMPARE, OUT_FOLDER, REPORT, DB,
                     GRASS_REGION_TEMPLATE):
    """
    Script to check differences between pairs of Feature Classes
    
    Suponha que temos diversas Feature Classes (FC) e que cada uma delas
    possui um determinado atributo; imagine tambem que,
    considerando todos os pares possiveis entre estas FC,
    se pretende comparar as diferencas na distribuicao dos valores
    desse atributo para cada par.
    
    * Dependencias:
    - GRASS;
    - PostgreSQL;
    - PostGIS.
    """

    import datetime
    import os
    import pandas
    from gasp.sql.fm import q_to_obj
    from gasp.to import db_to_tbl
    from gasp.sql.to import df_to_db
    from gasp.gt.toshp.cff import shp_to_shp
    from gasp.gt.toshp.db import dbtbl_to_shp
    from gasp.gt.toshp.rst import rst_to_polyg
    from gasp.gql.to import shp_to_psql
    from gasp.gql.tomtx import tbl_to_area_mtx
    from gasp.gt.prop.ff import check_isRaster
    from gasp.pyt.oss import fprop
    from gasp.sql.db import create_db
    from gasp.sql.tbl import tbls_to_tbl
    from gasp.sql.to import q_to_ntbl
    from gasp.gql.cln import fix_geom
    from gasp.to import db_to_tbl

    # Check if folder exists, if not create it
    if not os.path.exists(OUT_FOLDER):
        from gasp.pyt.oss import mkdir
        mkdir(OUT_FOLDER, overwrite=None)
    else:
        raise ValueError('{} already exists!'.format(OUT_FOLDER))

    from gasp.gt.wenv.grs import run_grass

    gbase = run_grass(OUT_FOLDER,
                      grassBIN='grass78',
                      location='shpdif',
                      srs=GRASS_REGION_TEMPLATE)

    import grass.script as grass
    import grass.script.setup as gsetup

    gsetup.init(gbase, OUT_FOLDER, 'shpdif', 'PERMANENT')

    from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp
    from gasp.gt.torst import rst_to_grs
    from gasp.gt.tbl.fld import rn_cols

    # Convert to SHAPE if file is Raster
    i = 0
    _SHP_TO_COMPARE = {}
    for s in SHAPES_TO_COMPARE:
        isRaster = check_isRaster(s)

        if isRaster:
            # To GRASS
            rstName = fprop(s, 'fn')
            inRst = rst_to_grs(s, "rst_" + rstName, as_cmd=True)
            # To Vector
            d = rst_to_polyg(inRst,
                             rstName,
                             rstColumn="lulc_{}".format(i),
                             gisApi="grass")

            # Export Shapefile
            shp = grs_to_shp(d, os.path.join(OUT_FOLDER, d + '.shp'), "area")

            _SHP_TO_COMPARE[shp] = "lulc_{}".format(i)

        else:
            # To GRASS
            grsV = shp_to_grs(s, fprop(s, 'fn'), asCMD=True)

            # Change name of column with comparing value
            ncol = "lulc_{}".format(str(i))
            rn_cols(grsV, {SHAPES_TO_COMPARE[s]: "lulc_{}".format(str(i))},
                    api="grass")

            # Export
            shp = grs_to_shp(grsV, os.path.join(OUT_FOLDER, grsV + '_rn.shp'),
                             "area")

            _SHP_TO_COMPARE[shp] = "lulc_{}".format(str(i))

        i += 1

    SHAPES_TO_COMPARE = _SHP_TO_COMPARE
    __SHAPES_TO_COMPARE = SHAPES_TO_COMPARE

    # Create database
    create_db(DB, api='psql')
    """ Union SHAPEs """

    UNION_SHAPE = {}
    FIX_GEOM = {}

    SHPS = list(__SHAPES_TO_COMPARE.keys())
    for i in range(len(SHPS)):
        for e in range(i + 1, len(SHPS)):
            # Optimized Union
            print("Union between {} and {}".format(SHPS[i], SHPS[e]))
            time_a = datetime.datetime.now().replace(microsecond=0)
            __unShp = optimized_union_anls(
                SHPS[i],
                SHPS[e],
                os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)),
                GRASS_REGION_TEMPLATE,
                os.path.join(OUT_FOLDER, "work_{}_{}".format(i, e)),
                multiProcess=True)
            time_b = datetime.datetime.now().replace(microsecond=0)
            print(time_b - time_a)

            # Rename cols
            unShp = rn_cols(
                __unShp, {
                    "a_" + __SHAPES_TO_COMPARE[SHPS[i]]:
                    __SHAPES_TO_COMPARE[SHPS[i]],
                    "b_" + __SHAPES_TO_COMPARE[SHPS[e]]:
                    __SHAPES_TO_COMPARE[SHPS[e]]
                })

            UNION_SHAPE[(SHPS[i], SHPS[e])] = unShp

    # Send data to postgresql
    SYNTH_TBL = {}

    for uShp in UNION_SHAPE:
        # Send data to PostgreSQL
        union_tbl = shp_to_psql(DB, UNION_SHAPE[uShp], api='shp2pgsql')

        # Produce table with % of area equal in both maps
        areaMapTbl = q_to_ntbl(
            DB,
            "{}_syn".format(union_tbl),
            ("SELECT CAST('{lulc_1}' AS text) AS lulc_1, "
             "CAST('{lulc_2}' AS text) AS lulc_2, "
             "round("
             "CAST(SUM(g_area) / 1000000 AS numeric), 4"
             ") AS agree_area, round("
             "CAST((SUM(g_area) / MIN(total_area)) * 100 AS numeric), 4"
             ") AS agree_percentage, "
             "round("
             "CAST(MIN(total_area) / 1000000 AS numeric), 4"
             ") AS total_area FROM ("
             "SELECT {map1_cls}, {map2_cls}, ST_Area(geom) AS g_area, "
             "CASE "
             "WHEN {map1_cls} = {map2_cls} "
             "THEN 1 ELSE 0 "
             "END AS isthesame, total_area FROM {tbl}, ("
             "SELECT SUM(ST_Area(geom)) AS total_area FROM {tbl}"
             ") AS foo2"
             ") AS foo WHERE isthesame = 1 "
             "GROUP BY isthesame").format(
                 lulc_1=fprop(uShp[0], 'fn'),
                 lulc_2=fprop(uShp[1], 'fn'),
                 map1_cls=__SHAPES_TO_COMPARE[uShp[0]],
                 map2_cls=__SHAPES_TO_COMPARE[uShp[1]],
                 tbl=union_tbl),
            api='psql')

        # Produce confusion matrix for the pair in comparison
        matrixTbl = tbl_to_area_mtx(DB, union_tbl,
                                    __SHAPES_TO_COMPARE[uShp[0]],
                                    __SHAPES_TO_COMPARE[uShp[1]],
                                    union_tbl + '_mtx')

        SYNTH_TBL[uShp] = {"TOTAL": areaMapTbl, "MATRIX": matrixTbl}

    # UNION ALL TOTAL TABLES
    total_table = tbls_to_tbl(DB, [SYNTH_TBL[k]["TOTAL"] for k in SYNTH_TBL],
                              'total_table')

    # Create table with % of agreement between each pair of maps
    mapsNames = q_to_obj(
        DB,
        ("SELECT lulc FROM ("
         "SELECT lulc_1 AS lulc FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc FROM {tbl} GROUP BY lulc_2"
         ") AS lu GROUP BY lulc ORDER BY lulc").format(tbl=total_table),
        db_api='psql').lulc.tolist()

    FLDS_TO_PIVOT = ["agree_percentage", "total_area"]

    Q = ("SELECT * FROM crosstab('"
         "SELECT CASE "
         "WHEN foo.lulc_1 IS NOT NULL THEN foo.lulc_1 ELSE jtbl.tmp1 "
         "END AS lulc_1, CASE "
         "WHEN foo.lulc_2 IS NOT NULL THEN foo.lulc_2 ELSE jtbl.tmp2 "
         "END AS lulc_2, CASE "
         "WHEN foo.{valCol} IS NOT NULL THEN foo.{valCol} ELSE 0 "
         "END AS agree_percentage FROM ("
         "SELECT lulc_1, lulc_2, {valCol} FROM {tbl} UNION ALL "
         "SELECT lulc_1, lulc_2, {valCol} FROM ("
         "SELECT lulc_1 AS lulc_2, lulc_2 AS lulc_1, {valCol} "
         "FROM {tbl}"
         ") AS tst"
         ") AS foo FULL JOIN ("
         "SELECT lulc_1 AS tmp1, lulc_2 AS tmp2 FROM ("
         "SELECT lulc_1 AS lulc_1 FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc_1 FROM {tbl} GROUP BY lulc_2"
         ") AS tst_1, ("
         "SELECT lulc_1 AS lulc_2 FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc_2 FROM {tbl} GROUP BY lulc_2"
         ") AS tst_2 WHERE lulc_1 = lulc_2 GROUP BY lulc_1, lulc_2"
         ") AS jtbl ON foo.lulc_1 = jtbl.tmp1 AND foo.lulc_2 = jtbl.tmp2 "
         "ORDER BY lulc_1, lulc_2"
         "') AS ct("
         "lulc_map text, {crossCols}"
         ")")

    TOTAL_AGREE_TABLE = None
    TOTAL_AREA_TABLE = None
    for f in FLDS_TO_PIVOT:
        if not TOTAL_AGREE_TABLE:
            TOTAL_AGREE_TABLE = q_to_ntbl(
                DB,
                "agreement_table",
                Q.format(tbl=total_table,
                         valCol=f,
                         crossCols=", ".join([
                             "{} numeric".format(map_) for map_ in mapsNames
                         ])),
                api='psql')

        else:
            TOTAL_AREA_TABLE = q_to_ntbl(DB,
                                         "area_table",
                                         Q.format(tbl=total_table,
                                                  valCol=f,
                                                  crossCols=", ".join([
                                                      "{} numeric".format(map_)
                                                      for map_ in mapsNames
                                                  ])),
                                         api='psql')

    # Union Mapping
    UNION_MAPPING = pandas.DataFrame(
        [[k[0], k[1], fprop(UNION_SHAPE[k], 'fn')] for k in UNION_SHAPE],
        columns=['shp_a', 'shp_b', 'union_shp'])

    UNION_MAPPING = df_to_db(DB, UNION_MAPPING, 'union_map', api='psql')

    # Export Results
    TABLES = [UNION_MAPPING, TOTAL_AGREE_TABLE, TOTAL_AREA_TABLE
              ] + [SYNTH_TBL[x]["MATRIX"] for x in SYNTH_TBL]

    SHEETS = ["union_map", "agreement_percentage", "area_with_data_km"] + [
        "{}_{}".format(fprop(x[0], 'fn')[:15],
                       fprop(x[1], 'fn')[:15]) for x in SYNTH_TBL
    ]

    db_to_tbl(DB, ["SELECT * FROM {}".format(x) for x in TABLES],
              REPORT,
              sheetsNames=SHEETS,
              dbAPI='psql')

    return REPORT
Example #4
0
File: to.py Project: jasp382/gasp
def psql_to_djgdb(sql_dumps,
                  db_name,
                  djg_proj=None,
                  mapTbl=None,
                  userDjgAPI=None):
    """
    Import PGSQL database in a SQL Script into the database
    controlled by one Django Project
    
    To work, the name of a model instance of type foreign key should be
    equal to the name of the 'db_column' clause.
    """

    import os
    from gasp import __import
    from gasp.pyt import obj_to_lst
    from gasp.sql.to import restore_tbls
    from gasp.sql.db import create_db, drop_db
    from gasp.sql.i import lst_tbl
    from gasp.sql.fm import q_to_obj
    from gasp.web.djg.mdl.rel import order_mdl_by_rel
    from gasp.web.djg.mdl.i import lst_mdl_proj

    # Global variables
    TABLES_TO_EXCLUDE = [
        'geography_columns', 'geometry_columns', 'spatial_ref_sys',
        'raster_columns', 'raster_columns', 'raster_overviews',
        'pointcloud_formats', 'pointcloud_columns'
    ]

    # Several SQL Files are expected
    sql_scripts = obj_to_lst(sql_dumps)

    # Create Database
    tmp_db_name = db_name + '_xxxtmp'
    create_db(tmp_db_name)

    # Restore tables in SQL files
    for sql in sql_scripts:
        restore_tbls(tmp_db_name, sql)

    # List tables in the database
    tables = [x for x in lst_tbl(tmp_db_name, excludeViews=True, api='psql')
              ] if not mapTbl else mapTbl

    # Open Django Project
    if djg_proj:
        from gasp.web.djg import open_Django_Proj
        application = open_Django_Proj(djg_proj)

    # List models in project
    app_mdls = lst_mdl_proj(djg_proj, thereIsApp=True, returnClassName=True)

    data_tbl = {}
    for t in tables:
        if t == 'auth_user' or t == 'auth_group' or t == 'auth_user_groups':
            data_tbl[t] = t

        elif t.startswith('auth') or t.startswith('django'):
            continue

        elif t not in app_mdls or t in TABLES_TO_EXCLUDE:
            continue

        else:
            data_tbl["{}.models.{}".format(t.split('_')[0], app_mdls[t])] = t

    from django.contrib.gis.db import models
    mdl_cls = [
        "{}.models.{}".format(m.split('_')[0], app_mdls[m]) for m in app_mdls
    ]
    orderned_table = order_mdl_by_rel(mdl_cls)

    # Add default tables of Django
    def_djg_tbl = []
    if 'auth_group' in data_tbl:
        def_djg_tbl.append('auth_group')

    if 'auth_user' in data_tbl:
        def_djg_tbl.append('auth_user')

    if 'auth_user_groups' in data_tbl:
        def_djg_tbl.append('auth_user_groups')

    orderned_table = def_djg_tbl + orderned_table

    if userDjgAPI:
        for table in orderned_table:
            # Map pgsql table data
            tableData = q_to_obj(tmp_db_name, data_tbl[table], of='dict')

            # Table data to Django Model
            if table == 'auth_user':
                mdl_cls = __import('django.contrib.auth.models.User')
            elif table == 'auth_group':
                mdl_cls = __import('django.contrib.auth.models.Group')
            else:
                mdl_cls = __import(table)

            __mdl = mdl_cls()

            for row in tableData:
                for col in row:
                    # Check if field is a foreign key
                    field_obj = mdl_cls._meta.get_field(col)

                    if not isinstance(field_obj, models.ForeignKey):
                        # If not, use the value

                        # But first check if value is nan (special type of float)
                        if row[col] != row[col]:
                            row[col] = None

                        setattr(__mdl, col, row[col])

                    else:
                        # If yes, use the model instance of the related table
                        # Get model of the table related com aquela cujos dados
                        # estao a ser restaurados
                        related_name = field_obj.related_model.__name__
                        related_model = __import('{a}.models.{m}'.format(
                            a=table.split('_')[0], m=related_name))

                        # If NULL, continue
                        if not row[col]:
                            setattr(__mdl, col, row[col])
                            continue

                        related_obj = related_model.objects.get(
                            pk=int(row[col]))

                        setattr(__mdl, col, related_obj)
                __mdl.save()
    else:
        import json
        import pandas as pd
        from gasp.sql.fm import q_to_obj
        from gasp.sql.to import df_to_db

        for tbl in orderned_table:
            if tbl not in data_tbl:
                continue

            data = q_to_obj(tmp_db_name,
                            "SELECT * FROM {}".format(data_tbl[tbl]))

            if tbl == 'auth_user':
                data['last_login'] = pd.to_datetime(data.last_login, utc=True)
                data['date_joined'] = pd.to_datetime(data.date_joined,
                                                     utc=True)

            df_to_db(db_name, data, data_tbl[tbl], append=True)

    drop_db(tmp_db_name)
Example #5
0
    def run_viewshed_by_cpu(tid,
                            db,
                            obs,
                            dem,
                            srs,
                            vis_basename='vis',
                            maxdst=None,
                            obselevation=None):
        # Create Database
        new_db = create_db("{}_{}".format(db, str(tid)), api='psql')

        # Points to Database
        pnt_tbl = df_to_db(new_db,
                           obs,
                           'pnt_tbl',
                           api='psql',
                           epsg=srs,
                           geomType='Point',
                           colGeom='geometry')

        # Create GRASS GIS Session
        workspace = mkdir(
            os.path.join(os.path.dirname(dem), 'work_{}'.format(str(tid))))
        loc_name = 'vis_loc'
        gbase = run_grass(workspace, location=loc_name, srs=dem)

        # Start GRASS GIS Session
        import grass.script as grass
        import grass.script.setup as gsetup
        gsetup.init(gbase, workspace, loc_name, 'PERMANENT')

        from gasp.gt.torst import rst_to_grs, grs_to_rst
        from gasp.gt.nop.surf import grs_viewshed
        from gasp.gt.deldt import del_rst

        # Send DEM to GRASS GIS
        grs_dem = rst_to_grs(dem, 'grs_dem', as_cmd=True)

        # Produce Viewshed for each point in obs
        for idx, row in obs.iterrows():
            # Get Viewshed raster
            vrst = grs_viewshed(grs_dem, (row.geometry.x, row.geometry.y),
                                '{}_{}'.format(vis_basename, str(row[obs_id])),
                                max_dist=maxdst,
                                obs_elv=obselevation)

            # Export Raster to File
            frst = grs_to_rst(vrst, os.path.join(workspace, vrst + '.tif'))

            # Raster to Array
            img = gdal.Open(frst)
            num = img.ReadAsArray()

            # Two Dimension to One Dimension
            # Reshape Array
            numone = num.reshape(num.shape[0] * num.shape[1])

            # Get Indexes with visibility
            visnum = np.arange(numone.shape[0]).astype(np.uint32)
            visnum = visnum[numone == 1]

            # Get rows indexes
            visrow = visnum / num.shape[0]
            visrow = visrow.astype(np.uint32)

            # Get cols indexes
            viscol = visnum - (visrow * num.shape[1])

            # Visibility indexes to Pandas DataFrame
            idxnum = np.full(visrow.shape, row[obs_id])
            visdf = pd.DataFrame({
                'pntid': idxnum,
                'rowi': visrow,
                'coli': viscol
            })

            # Pandas DF to database
            # Create Visibility table
            df_to_db(new_db,
                     visdf,
                     vis_basename,
                     api='psql',
                     colGeom=None,
                     append=None if not idx else True)

            # Delete all variables
            numone = None
            visnum = None
            visrow = None
            viscol = None
            idxnum = None
            visdf = None
            del img

            # Delete GRASS GIS File
            del_rst(vrst)

            # Delete TIFF File
            del_file(frst)
            frst = None
Example #6
0
def shp_to_psql(dbname,
                shpData,
                pgTable=None,
                api="pandas",
                mapCols=None,
                srsEpsgCode=None,
                encoding="UTF-8",
                dbset='default'):
    """
    Send Shapefile to PostgreSQL
    
    if api is equal to "pandas" - GeoPandas API will be used;
    if api is equal to "shp2pgsql" - shp2pgsql tool will be used.
    """

    import os
    from gasp.pyt.oss import fprop
    from gasp.gt.prop.prj import get_epsg_shp

    # If defined, srsEpsgCode must be a integer value
    if srsEpsgCode:
        if type(srsEpsgCode) != int:
            raise ValueError('srsEpsgCode should be a integer value')

    if api == "pandas":
        from gasp.fm import tbl_to_obj
        from gasp.sql.to import df_to_db
        from gasp.gt.prop.feat import get_gtype

    elif api == "shp2pgsql":
        from gasp import exec_cmd
        from gasp.sql import psql_cmd
        from gasp.pyt.oss import del_file

    else:
        raise ValueError(
            'api value is not valid. options are: pandas and shp2pgsql')

    # Check if shp is folder
    if os.path.isdir(shpData):
        from gasp.pyt.oss import lst_ff

        shapes = lst_ff(shpData, file_format='.shp')

    else:
        from gasp.pyt import obj_to_lst

        shapes = obj_to_lst(shpData)

    epsgs = [get_epsg_shp(i)
             for i in shapes] if not srsEpsgCode else [srsEpsgCode]

    if None in epsgs:
        raise ValueError(
            ("Cannot obtain EPSG code. Use the srsEpsgCode parameter "
             "to specify the EPSG code of your data."))

    tables = []
    for _i in range(len(shapes)):
        # Get Table name
        tname = fprop(shapes[_i], 'fn', forceLower=True) if not pgTable else \
            pgTable[_i] if type(pgTable) == list else pgTable if len(shapes) == 1 \
            else pgTable + '_{}'.format(_i+1)

        # Import data
        if api == "pandas":
            # SHP to DataFrame
            df = tbl_to_obj(shapes[_i])

            if not mapCols:
                df.rename(columns={x: x.lower()
                                   for x in df.columns.values},
                          inplace=True)
            else:
                renameD = {
                    x : mapCols[x].lower() if x in mapCols else \
                    x.lower() for x in df.columns.values
                }
                df.rename(columns=renameD, inplace=True)

            if "geometry" in df.columns.values:
                geomCol = "geometry"

            elif "geom" in df.columns.values:
                geomCol = "geom"

            else:
                raise ValueError("No Geometry found in shp")

            # GeoDataFrame to PSQL
            df_to_db(dbname,
                     df,
                     tname,
                     append=True,
                     api='psql',
                     epsg=epsgs[_i] if not srsEpsgCode else srsEpsgCode,
                     colGeom=geomCol,
                     geomType=get_gtype(shapes[_i],
                                        name=True,
                                        py_cls=False,
                                        gisApi='ogr'))

        else:
            sql_script = os.path.join(os.path.dirname(shapes[_i]),
                                      tname + '.sql')

            cmd = ('shp2pgsql -I -s {epsg} -W {enc} '
                   '{shp} public.{name} > {out}').format(
                       epsg=epsgs[_i] if not srsEpsgCode else srsEpsgCode,
                       shp=shapes[_i],
                       name=tname,
                       out=sql_script,
                       enc=encoding)

            outcmd = exec_cmd(cmd)

            psql_cmd(dbname, sql_script, dbcon=dbset)

            del_file(sql_script)

        tables.append(tname)

    return tables[0] if len(tables) == 1 else tables