Python df_to_db Exemples, glass.g.wt.sql.df_to_db Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : sql.py Projet : jasp382/glass

def db_to_db(db_a, db_b, typeDBA, typeDBB):
    """
    All tables in one Database to other database
    
    Useful when we want to migrate a SQLITE DB to a PostgreSQL DB
    
    typesDB options:
    * sqlite
    * psql
    """
    
    from glass.g.wt.sql    import df_to_db
    from glass.ng.sql.q    import q_to_obj
    from glass.ng.prop.sql import lst_tbl
    from glass.ng.sql.db   import create_db
    
    # List Tables in DB A
    tbls = lst_tbl(db_a, excludeViews=True, api=typeDBA)
    
    # Create database B
    db_b = create_db(db_b, overwrite=False, api=typeDBB)
    
    # Table to Database B
    for tbl in tbls:
        df = q_to_obj(
            db_a, "SELECT * FROM {}".format(tbl), db_api=typeDBA
        )
        
        df_to_db(db_b, df, tbl, append=None, api=typeDBB)

Exemple #2

0

Afficher le fichier

Fichier : sql.py Projet : jasp382/glass

def tbl_fromdb_todb(from_db, to_db, tables, qForTbl=None, api='pandas'):
    """
    Send PGSQL Tables from one database to other
    """
    
    from glass.pys      import obj_to_lst
    from glass.g.wt.sql import df_to_db
    
    api = 'pandas' if api != 'pandas' and api != 'psql' else api
    
    tables = obj_to_lst(tables)
    
    if api == 'pandas':
        from glass.ng.sql.q import q_to_obj
    
        for table in tables:
            if not qForTbl:
                tblDf = q_to_obj(from_db, "SELECT * FROM {}".format(
                    table), db_api='psql')
        
            else:
                if table not in qForTbl:
                    tblDf = q_to_obj(from_db, "SELECT * FROM {}".format(
                        table), db_api='psql')
            
                else:
                    tblDf = q_to_obj(from_db, qForTbl[table], db_api='psql')
        
            df_to_db(to_db, tblDf, table, api='psql')
    
    else:
        import os
        from glass.pys.oss     import mkdir, del_folder
        from glass.ng.sql.bkup import dump_tbls
        from glass.ng.sql.db   import restore_tbls
        
        tmpFolder = mkdir(
            os.path.dirname(os.path.abspath(__file__)), randName=True
        )
        
        # Dump 
        sqlScript = dump_tbls(from_db, tables, os.path.join(
            tmpFolder, "tables_data.sql"
        ))
            
        # Restore
        restore_tbls(to_db, sqlScript, tables)
        
        del_folder(tmpFolder)

Exemple #3

0

Afficher le fichier

Fichier : sql.py Projet : jasp382/glass

def apndtbl_in_otherdb(db_a, db_b, tblA, tblB, mapCols,
                       geomCol=None, srsEpsg=None, con_a=None, con_b=None):
    """
    Append data of one table to another table in other database.
    """
    
    from glass.ng.sql.q import q_to_obj
    from glass.g.wt.sql import df_to_db
    
    df = q_to_obj(db_a, "SELECT {} FROM {}".format(
        ", ".join(list(mapCols.keys())), tblA
    ), db_api='psql', geomCol=geomCol, epsg=srsEpsg, dbset=con_a)
    
    # Change Names
    df.rename(columns=mapCols, inplace=True)
    
    if geomCol:
        for k in mapCols:
            if geomCol == k:
                geomCol = mapCols[k]
                break
    
    # Get Geom Type
    # Send data to other database
    if geomCol and srsEpsg:
        from glass.g.prop.feat import get_gtype
        
        gType = get_gtype(df, geomCol=geomCol, gisApi='pandas')
        
        df_to_db(
            db_b, df, tblB, append=True, api='psql', epsg=srsEpsg,
            geomType=gType, colGeom=geomCol, dbset=con_b
        )
    
    else:
        df_to_db(db_b, df, tblB, append=True, api='psql', dbset=con_b)
    
    return tblB

Exemple #4

0

Afficher le fichier

Fichier : circ.py Projet : jasp382/glass

def name_circulations(db,
                      GTFS_SCHEMA,
                      OTHER_SCHEMA,
                      output,
                      other_db=None,
                      serviceSchema=None,
                      routeIdColName=None,
                      tripIdColName=None):
    """
    Get all circulations from GTFS and associate these circulations to
    other meta columns of other database
    
    GTFS_SCHEMA = {
        "TNAME"     : "stop_times",
        "TRIP"      : "trip_id",
        "STOP"      : "stop_id",
        "SEQUENCE"  : "stop_sequence",
        "DEPARTURE" : "departure_time"
    }
    
    OTHER_SCHEMA = {
        "TNAME"    : "percursos_geom_v2",
        "ROUTE"    : ["carreira", "variante", "sentido"],
        "SEQUENCE" : "ordem",
        "STOP"     : "paragem"
    }
    
    serviceSchema = {
        "TRIPS" : {
            "TNAME"   : "trips",
            "TRIP"    : "trip_id",
            "SERVICE" : "service_id"
        },
        "CALENDAR" : {
            "TNAME"   : "calendar_dates",
            "SERVICE" : "service_id",
            "DATE"    : "date"
        },
        "FILTER_DAY" : 20180308
    }
    """

    import os
    from glass.pys import obj_to_lst
    from glass.ng.sql.q import q_to_obj

    other_db = db if not other_db else other_db

    # Sanitize Route ID in Other Schema
    OTHER_SCHEMA_ROUTE = obj_to_lst(OTHER_SCHEMA["ROUTE"])

    if len(OTHER_SCHEMA_ROUTE) > 1:
        from glass.ng.sql.col import txt_cols_to_col

        ROUTE_COL = routeIdColName if routeIdColName else "fid_route"

        txt_cols_to_col(other_db, OTHER_SCHEMA["TNAME"], OTHER_SCHEMA_ROUTE,
                        "|", ROUTE_COL)

    else:
        ROUTE_COL = routeIdColName if routeIdColName else \
            OTHER_SCHEMA_ROUTE[0]
    """
    Get all circulations in GTFS and their start time
    """
    if serviceSchema:
        serviceSchema["FILTER_DAY"] = obj_to_lst(serviceSchema["FILTER_DAY"])

    where = "" if not serviceSchema else (" WHERE {} ").format(" OR ".join([
        "{}.{} = {}".format(serviceSchema["CALENDAR"]["TNAME"],
                            serviceSchema["CALENDAR"]["DATE"], d)
        for d in serviceSchema["FILTER_DAY"]
    ]))

    injoinQ = "" if not serviceSchema else (
        "INNER JOIN ("
        "SELECT {tripsTbl}.{tripsTripId} "
        "FROM {tripsTbl} INNER JOIN {calenTbl} ON "
        "{tripsTbl}.{tripsServId} = {calenTbl}.{calenServId}{whr} "
        "GROUP BY {tripsTbl}.{tripsTripId}"
        ") AS trip_service ON {stopTimeTbl}.{stopTimeTrip} "
        "= trip_service.{tripsTripId} ").format(
            tripsTbl=serviceSchema["TRIPS"]["TNAME"],
            tripsTripId=serviceSchema["TRIPS"]["TRIP"],
            tripsServId=serviceSchema["TRIPS"]["SERVICE"],
            calenTbl=serviceSchema["CALENDAR"]["TNAME"],
            calenServId=serviceSchema["CALENDAR"]["SERVICE"],
            stopTimeTbl=GTFS_SCHEMA["TNAME"],
            stopTimeTrip=GTFS_SCHEMA["TRIP"],
            whr=where)

    newTripCol = tripIdColName if tripIdColName else GTFS_SCHEMA["TRIP"]

    Q = (
        "SELECT {stopTimesT}.{tripId} AS {newTrip}, "
        "array_agg({stopTimesT}.{stopId} "
        "ORDER BY {stopTimesT}.{tripId}, {stopTimesT}.{stopSq}) AS stops, "
        "array_agg({stopTimesT}.{stopSq} "
        "ORDER BY {stopTimesT}.{tripId}, {stopTimesT}.{stopSq}) AS stops_order, "
        "MIN({stopTimesT}.{depTime}) AS departure, "
        "MAX({stopTimesT}.{depTime}) AS depar_last_stop "
        "FROM {stopTimesT} {injoin}"
        "GROUP BY {stopTimesT}.{tripId}").format(
            tripId=GTFS_SCHEMA["TRIP"],
            stopId=GTFS_SCHEMA["STOP"],
            stopSq=GTFS_SCHEMA["SEQUENCE"],
            depTime=GTFS_SCHEMA["DEPARTURE"],
            stopTimesT=GTFS_SCHEMA["TNAME"],
            injoin=injoinQ,
            newTrip=newTripCol)

    circ = q_to_obj(db, Q)
    """
    Get all routes metadata in the "Other Database/Table"
    """
    Q = ("SELECT {idRoute}, "
         "array_agg({stopF} ORDER BY {idRoute}, {stopSq}) AS stops, "
         "array_agg({stopSq} ORDER BY {idRoute}, {stopSq}) AS stops_order "
         "FROM {t} GROUP BY {idRoute}").format(idRoute=ROUTE_COL,
                                               stopF=OTHER_SCHEMA["STOP"],
                                               stopSq=OTHER_SCHEMA["SEQUENCE"],
                                               t=OTHER_SCHEMA["TNAME"])

    routes = q_to_obj(other_db, Q)

    def sanitizeDf(df, col):
        df[col] = df[col].astype(str)
        df[col] = df[col].str.replace('L', '')
        df[col] = df[col].str.replace(' ', '')
        df[col] = df[col].str.replace('[', '')
        df[col] = df[col].str.replace(']', '')

        return df

    circ = sanitizeDf(circ, "stops")
    routes = sanitizeDf(routes, "stops")

    newDf = circ.merge(routes, how='inner', left_on="stops", right_on="stops")

    if os.path.dirname(output):
        # Write XLS
        from glass.ng.wt import obj_to_tbl

        obj_to_tbl(newDf, output)

    else:
        # Send to pgsql
        from glass.g.wt.sql import df_to_db

        df_to_db(db, newDf, output, api='psql')

    return output

Exemple #5

0

Afficher le fichier

Fichier : sql.py Projet : jasp382/glass

def tbl_to_db(tblFile, db, sqlTbl, delimiter=None, encoding_='utf-8',
              sheet=None, isAppend=None, api_db='psql', colsMap=None):
    """
    Table file to Database Table
    
    API's available:
    * psql;
    * sqlite;
    """
    
    import os
    from glass.pys     import obj_to_lst
    from glass.pys.oss import fprop
    from glass.ng.rd   import tbl_to_obj
    from glass.g.wt.sql import df_to_db
    
    if os.path.isdir(tblFile):
        from glass.pys.oss import lst_ff
        
        tbls = lst_ff(tblFile)
    
    else:
        tbls = obj_to_lst(tblFile)
    
    outSQLTbl = obj_to_lst(sqlTbl)
    
    RTBL = []
    for i in range(len(tbls)):
        fp = fprop(tbls[i], ['fn', 'ff'])
        ff = fp['fileformat']
        fn = fp['filename']
    
        if ff == '.csv' or ff == '.txt' or ff == '.tsv':
            if not delimiter:
                raise ValueError((
                    "To convert TXT to DB table, you need to give a value for the "
                    "delimiter input parameter"
                ))
        
            __enc = 'utf-8' if not encoding_ else encoding_
        
            data = tbl_to_obj(
                tbls[i], _delimiter=delimiter, encoding_=__enc
            )
    
        elif ff == '.dbf':
            data = tbl_to_obj(tbls[i])
    
        elif ff == '.xls' or ff == '.xlsx':
            data = tbl_to_obj(tbls[i], sheet=sheet)
    
        elif ff == '.ods':
            if not sheet:
                raise ValueError((
                    "To convert ODS to DB table, you need to give a value "
                    "for the sheet input parameter"
                ))
        
            data = tbl_to_obj(tbls[i], sheet=sheet)
    
        else:
            raise ValueError('{} is not a valid table format!'.format(ff))
        
        if colsMap:
            data.rename(columns=colsMap, inplace=True)
    
        # Send data to database
        out_tbl = fn if not outSQLTbl else outSQLTbl[i] \
            if i+1 <= len(tbls) else fn
        _rtbl = df_to_db(
            db, data, out_tbl,
            append=isAppend, api=api_db
        )
        
        RTBL.append(_rtbl)
    
    return RTBL[0] if len(RTBL) == 1 else RTBL

Exemple #6

0

Afficher le fichier

def dsn_data_collection_by_multibuffer(inBuffers,
                                       workspace,
                                       db,
                                       datasource,
                                       keywords=None):
    """
    Extract Digital Social Network Data for each sub-buffer in buffer.
    A sub-buffer is a buffer with a radius equals to the main buffer radius /2
    and with a central point at North, South, East, West, Northeast, Northwest,
    Southwest and Southeast of the main buffer central point.
    
    inBuffers = {
        "lisbon"    : {
            'x'      : -89004.994779, # in meters
            'y'      : -102815.866054, # in meters
            'radius' : 10000,
            'epsg'   : 3763
        },
        "london     : {
            'x'      : -14210.551441, # in meters
            'y'      : 6711542.47559, # in meters
            'radius' : 10000,
            'epsg'   : 3857
        }
    }
    or
    inBuffers = {
        "lisbon" : {
            "path" : /path/to/file.shp,
            "epsg" : 3763
        }
    }
    
    keywords = ['flood', 'accident', 'fire apartment', 'graffiti', 'homeless']
    
    datasource = 'facebook' or datasource = 'flickr'
    TODO: Only works for Flickr and Facebook
    """

    import os
    from osgeo import ogr
    from glass.pys import obj_to_lst
    from glass.ng.sql.db import create_db
    from glass.ng.sql.q import q_to_ntbl
    from glass.g.wt.sql import df_to_db
    from glass.g.it.db import shp_to_psql
    from glass.g.it.shp import dbtbl_to_shp
    from glass.g.gp.prox.bfing import get_sub_buffers, dic_buffer_array_to_shp

    if datasource == 'flickr':
        from glass.g.acq.dsn.flickr import photos_location

    elif datasource == 'facebook':
        from glass.g.acq.dsn.fb.places import places_by_query

    keywords = obj_to_lst(keywords)
    keywords = ["None"] if not keywords else keywords

    # Create Database to Store Data
    create_db(db, overwrite=True, api='psql')

    for city in inBuffers:
        # Get Smaller Buffers
        if "path" in inBuffers[city]:
            # Get X, Y and Radius
            from glass.g.prop.feat.bf import bf_prop

            __bfprop = bf_prop(inBuffers[city]["path"],
                               inBuffers[city]["epsg"],
                               isFile=True)

            inBuffers[city]["x"] = __bfprop["X"]
            inBuffers[city]["y"] = __bfprop["Y"]
            inBuffers[city]["radius"] = __bfprop["R"]

        inBuffers[city]["list_buffer"] = [{
            'X': inBuffers[city]["x"],
            'Y': inBuffers[city]["y"],
            'RADIUS': inBuffers[city]['radius'],
            'cardeal': 'major'
        }] + get_sub_buffers(inBuffers[city]["x"], inBuffers[city]["y"],
                             inBuffers[city]["radius"])

        # Smaller Buffers to File
        multiBuffer = os.path.join(workspace, 'buffers_{}.shp'.format(city))
        dic_buffer_array_to_shp(inBuffers[city]["list_buffer"],
                                multiBuffer,
                                inBuffers[city]['epsg'],
                                fields={'cardeal': ogr.OFTString})

        # Retrive data for each keyword and buffer
        # Record these elements in one dataframe
        c = None
        tblData = None
        for bf in inBuffers[city]["list_buffer"]:
            for k in keywords:
                if datasource == 'flickr':
                    tmpData = photos_location(
                        bf,
                        inBuffers[city]["epsg"],
                        keyword=k if k != 'None' else None,
                        epsg_out=inBuffers[city]["epsg"],
                        onlySearchAreaContained=False)

                elif datasource == 'facebook':
                    tmpData = places_by_query(
                        bf,
                        inBuffers[city]["epsg"],
                        keyword=k if k != 'None' else None,
                        epsgOut=inBuffers[city]["epsg"],
                        onlySearchAreaContained=False)

                if type(tmpData) == int:
                    print("NoData finded for buffer '{}' and keyword '{}'".
                          format(bf['cardeal'], k))

                    continue

                tmpData["keyword"] = k
                tmpData["buffer_or"] = bf["cardeal"]

                if not c:
                    tblData = tmpData
                    c = 1
                else:
                    tblData = tblData.append(tmpData, ignore_index=True)

        inBuffers[city]["data"] = tblData

        # Get data columns names
        cols = inBuffers[city]["data"].columns.values
        dataColumns = [
            c for c in cols if c != 'geom' and c != 'keyword' \
            and c != 'buffer_or' and c != 'geometry'
        ]

        # Send data to PostgreSQL
        if 'geometry' in cols:
            cgeom = 'geometry'

        else:
            cgeom = 'geom'

        inBuffers[city]["table"] = 'tbldata_{}'.format(city)

        df_to_db(db,
                 inBuffers[city]["data"],
                 inBuffers[city]["table"],
                 api='psql',
                 epsg=inBuffers[city]["epsg"],
                 geomType='POINT',
                 colGeom=cgeom)

        # Send Buffers data to PostgreSQL
        inBuffers[city]["pg_buffer"] = shp_to_psql(
            db,
            multiBuffer,
            pgTable='buffers_{}'.format(city),
            api="shp2pgsql",
            srsEpsgCode=inBuffers[city]["epsg"])

        inBuffers[city]["filter_table"] = q_to_ntbl(
            db,
            "filter_{}".format(inBuffers[city]["table"]),
            ("SELECT srcdata.*, "
             "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) "
             "AS intersect_buffer FROM ("
             "SELECT {cols}, keyword, geom, "
             "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer "
             "FROM {pgtable} "
             "GROUP BY {cols}, keyword, geom"
             ") AS srcdata, ("
             "SELECT cardeal, geom AS bfg FROM {bftable}"
             ") AS buffersg "
             "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS TRUE "
             "GROUP BY {cols}, keyword, geom, extracted_buffer").format(
                 cols=", ".join(dataColumns),
                 pgtable=inBuffers[city]["table"],
                 bftable=inBuffers[city]["pg_buffer"]),
            api='psql')

        inBuffers[city]["outside_table"] = q_to_ntbl(
            db,
            "outside_{}".format(inBuffers[city]["table"]),
            ("SELECT * FROM ("
             "SELECT srcdata.*, "
             "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) "
             "AS not_intersect_buffer FROM ("
             "SELECT {cols}, keyword, geom, "
             "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer "
             "FROM {pgtable} "
             "GROUP BY {cols}, keyword, geom"
             ") AS srcdata, ("
             "SELECT cardeal, geom AS bfg FROM {bftable}"
             ") AS buffersg "
             "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS NOT TRUE "
             "GROUP BY {cols}, keyword, geom, extracted_buffer"
             ") AS foo WHERE array_length(not_intersect_buffer, 1) = 9"
             ).format(cols=", ".join(dataColumns),
                      pgtable=inBuffers[city]["table"],
                      bftable=inBuffers[city]["pg_buffer"]),
            api='psql')

        # Union these two tables
        inBuffers[city]["table"] = q_to_ntbl(
            db,
            "data_{}".format(city),
            ("SELECT * FROM {intbl} UNION ALL "
             "SELECT {cols}, keyword, geom, extracted_buffer, "
             "CASE WHEN array_length(not_intersect_buffer, 1) = 9 "
             "THEN '{array_symbol}' ELSE not_intersect_buffer END AS "
             "intersect_buffer FROM {outbl}").format(
                 intbl=inBuffers[city]["filter_table"],
                 outbl=inBuffers[city]["outside_table"],
                 cols=", ".join(dataColumns),
                 array_symbol='{' + '}'),
            api='psql')
        """
        Get Buffers table with info related:
        -> pnt_obtidos = nr pontos obtidos usando esse buffer
        -> pnt_obtidos_fora = nt pontos obtidos fora desse buffer, mas 
        obtidos com ele
        -> pnt_intersect = nt pontos que se intersectam com o buffer
        -> pnt_intersect_non_obtain = nr pontos que se intersectam mas nao 
        foram obtidos como buffer
        """
        inBuffers[city]["pg_buffer"] = q_to_ntbl(
            db,
            "dt_{}".format(inBuffers[city]["pg_buffer"]),
            ("SELECT main.*, get_obtidos.pnt_obtidos, "
             "obtidos_fora.pnt_obtidos_fora, intersecting.pnt_intersect, "
             "int_not_obtained.pnt_intersect_non_obtain "
             "FROM {bf_table} AS main "
             "LEFT JOIN ("
             "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos "
             "FROM {bf_table} AS bf "
             "INNER JOIN {dt_table} AS dt "
             "ON bf.cardeal = ANY(dt.extracted_buffer) "
             "GROUP BY gid, cardeal"
             ") AS get_obtidos ON main.gid = get_obtidos.gid "
             "LEFT JOIN ("
             "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos_fora "
             "FROM {bf_table} AS bf "
             "INNER JOIN {dt_table} AS dt "
             "ON bf.cardeal = ANY(dt.extracted_buffer) "
             "WHERE ST_Intersects(bf.geom, dt.geom) IS NOT TRUE "
             "GROUP BY gid, cardeal"
             ") AS obtidos_fora ON main.gid = obtidos_fora.gid "
             "LEFT JOIN ("
             "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect "
             "FROM {bf_table} AS bf "
             "INNER JOIN {dt_table} AS dt "
             "ON bf.cardeal = ANY(dt.intersect_buffer) "
             "GROUP BY gid, cardeal"
             ") AS intersecting ON main.gid = intersecting.gid "
             "LEFT JOIN ("
             "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect_non_obtain "
             "FROM {bf_table} AS bf "
             "INNER JOIN {dt_table} AS dt "
             "ON bf.cardeal = ANY(dt.intersect_buffer) "
             "WHERE NOT (bf.cardeal = ANY(dt.extracted_buffer)) "
             "GROUP BY gid, cardeal"
             ") AS int_not_obtained "
             "ON main.gid = int_not_obtained.gid "
             "ORDER BY main.gid").format(bf_table=inBuffers[city]["pg_buffer"],
                                         dt_table=inBuffers[city]["table"]),
            api='psql')
        """
        Get Points table with info related:
        -> nobtido = n vezes um ponto foi obtido
        -> obtido_e_intersect = n vezes um ponto foi obtido usando um buffer 
        com o qual se intersecta
        -> obtido_sem_intersect = n vezes um ponto foi obtido usando um buffer
        com o qual nao se intersecta
        -> nintersect = n vezes que um ponto se intersecta com um buffer
        -> intersect_sem_obtido = n vezes que um ponto nao foi obtido apesar
        de se intersectar com o buffer
        """
        inBuffers[city]["table"] = q_to_ntbl(
            db,
            "info_{}".format(city),
            ("SELECT {cols}, dt.keyword, dt.geom, "
             "CAST(dt.extracted_buffer AS text) AS extracted_buffer, "
             "CAST(dt.intersect_buffer AS text) AS intersect_buffer, "
             "array_length(extracted_buffer, 1) AS nobtido, "
             "SUM(CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
             "THEN 1 ELSE 0 END) AS obtido_e_intersect, "
             "(array_length(extracted_buffer, 1) - SUM("
             "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
             "THEN 1 ELSE 0 END)) AS obtido_sem_intersect, "
             "array_length(intersect_buffer, 1) AS nintersect, "
             "(array_length(intersect_buffer, 1) - SUM("
             "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
             "THEN 1 ELSE 0 END)) AS intersect_sem_obtido "
             "FROM {dt_table} AS dt "
             "INNER JOIN {bf_table} AS bf "
             "ON bf.cardeal = ANY(dt.extracted_buffer) "
             "GROUP BY {cols}, dt.keyword, dt.geom, "
             "dt.extracted_buffer, dt.intersect_buffer").format(
                 dt_table=inBuffers[city]["table"],
                 bf_table=inBuffers[city]["pg_buffer"],
                 cols=", ".join(["dt.{}".format(x) for x in dataColumns])),
            api='psql')

        # Export Results
        dbtbl_to_shp(db,
                     inBuffers[city]["table"],
                     'geom',
                     os.path.join(workspace,
                                  "{}.shp".format(inBuffers[city]["table"])),
                     api='psql',
                     epsg=inBuffers[city]["epsg"])

        dbtbl_to_shp(db,
                     inBuffers[city]["pg_buffer"],
                     'geom',
                     os.path.join(
                         workspace,
                         "{}.shp".format(inBuffers[city]["pg_buffer"])),
                     api='psql',
                     epsg=inBuffers[city]["epsg"])

    return inBuffers

Exemple #7

0

Afficher le fichier

Fichier : __init__.py Projet : jasp382/glass

def check_shape_diff(SHAPES_TO_COMPARE, OUT_FOLDER, REPORT, DB,
                     GRASS_REGION_TEMPLATE):
    """
    Script to check differences between pairs of Feature Classes
    
    Suponha que temos diversas Feature Classes (FC) e que cada uma delas
    possui um determinado atributo; imagine tambem que,
    considerando todos os pares possiveis entre estas FC,
    se pretende comparar as diferencas na distribuicao dos valores
    desse atributo para cada par.
    
    * Dependencias:
    - GRASS;
    - PostgreSQL;
    - PostGIS.
    """

    import datetime
    import os
    import pandas
    from glass.ng.sql.q import q_to_obj
    from glass.ng.it import db_to_tbl
    from glass.g.wt.sql import df_to_db
    from glass.g.dp.rst.toshp import rst_to_polyg
    from glass.g.it.db import shp_to_psql
    from glass.g.dp.tomtx import tbl_to_area_mtx
    from glass.g.prop import check_isRaster
    from glass.pys.oss import fprop
    from glass.ng.sql.db import create_db
    from glass.ng.sql.tbl import tbls_to_tbl
    from glass.ng.sql.q import q_to_ntbl

    # Check if folder exists, if not create it
    if not os.path.exists(OUT_FOLDER):
        from glass.pys.oss import mkdir
        mkdir(OUT_FOLDER, overwrite=None)
    else:
        raise ValueError('{} already exists!'.format(OUT_FOLDER))

    from glass.g.wenv.grs import run_grass

    gbase = run_grass(OUT_FOLDER,
                      grassBIN='grass78',
                      location='shpdif',
                      srs=GRASS_REGION_TEMPLATE)

    import grass.script as grass
    import grass.script.setup as gsetup

    gsetup.init(gbase, OUT_FOLDER, 'shpdif', 'PERMANENT')

    from glass.g.it.shp import shp_to_grs, grs_to_shp
    from glass.g.it.rst import rst_to_grs
    from glass.g.tbl.col import rn_cols

    # Convert to SHAPE if file is Raster
    i = 0
    _SHP_TO_COMPARE = {}
    for s in SHAPES_TO_COMPARE:
        isRaster = check_isRaster(s)

        if isRaster:
            # To GRASS
            rstName = fprop(s, 'fn')
            inRst = rst_to_grs(s, "rst_" + rstName, as_cmd=True)
            # To Vector
            d = rst_to_polyg(inRst,
                             rstName,
                             rstColumn="lulc_{}".format(i),
                             gisApi="grass")

            # Export Shapefile
            shp = grs_to_shp(d, os.path.join(OUT_FOLDER, d + '.shp'), "area")

            _SHP_TO_COMPARE[shp] = "lulc_{}".format(i)

        else:
            # To GRASS
            grsV = shp_to_grs(s, fprop(s, 'fn'), asCMD=True)

            # Change name of column with comparing value
            ncol = "lulc_{}".format(str(i))
            rn_cols(grsV, {SHAPES_TO_COMPARE[s]: "lulc_{}".format(str(i))},
                    api="grass")

            # Export
            shp = grs_to_shp(grsV, os.path.join(OUT_FOLDER, grsV + '_rn.shp'),
                             "area")

            _SHP_TO_COMPARE[shp] = "lulc_{}".format(str(i))

        i += 1

    SHAPES_TO_COMPARE = _SHP_TO_COMPARE
    __SHAPES_TO_COMPARE = SHAPES_TO_COMPARE

    # Create database
    create_db(DB, api='psql')
    """ Union SHAPEs """

    UNION_SHAPE = {}
    FIX_GEOM = {}

    SHPS = list(__SHAPES_TO_COMPARE.keys())
    for i in range(len(SHPS)):
        for e in range(i + 1, len(SHPS)):
            # Optimized Union
            print("Union between {} and {}".format(SHPS[i], SHPS[e]))
            time_a = datetime.datetime.now().replace(microsecond=0)
            __unShp = optimized_union_anls(
                SHPS[i],
                SHPS[e],
                os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)),
                GRASS_REGION_TEMPLATE,
                os.path.join(OUT_FOLDER, "work_{}_{}".format(i, e)),
                multiProcess=True)
            time_b = datetime.datetime.now().replace(microsecond=0)
            print(time_b - time_a)

            # Rename cols
            unShp = rn_cols(
                __unShp, {
                    "a_" + __SHAPES_TO_COMPARE[SHPS[i]]:
                    __SHAPES_TO_COMPARE[SHPS[i]],
                    "b_" + __SHAPES_TO_COMPARE[SHPS[e]]:
                    __SHAPES_TO_COMPARE[SHPS[e]]
                })

            UNION_SHAPE[(SHPS[i], SHPS[e])] = unShp

    # Send data to postgresql
    SYNTH_TBL = {}

    for uShp in UNION_SHAPE:
        # Send data to PostgreSQL
        union_tbl = shp_to_psql(DB, UNION_SHAPE[uShp], api='shp2pgsql')

        # Produce table with % of area equal in both maps
        areaMapTbl = q_to_ntbl(
            DB,
            "{}_syn".format(union_tbl),
            ("SELECT CAST('{lulc_1}' AS text) AS lulc_1, "
             "CAST('{lulc_2}' AS text) AS lulc_2, "
             "round("
             "CAST(SUM(g_area) / 1000000 AS numeric), 4"
             ") AS agree_area, round("
             "CAST((SUM(g_area) / MIN(total_area)) * 100 AS numeric), 4"
             ") AS agree_percentage, "
             "round("
             "CAST(MIN(total_area) / 1000000 AS numeric), 4"
             ") AS total_area FROM ("
             "SELECT {map1_cls}, {map2_cls}, ST_Area(geom) AS g_area, "
             "CASE "
             "WHEN {map1_cls} = {map2_cls} "
             "THEN 1 ELSE 0 "
             "END AS isthesame, total_area FROM {tbl}, ("
             "SELECT SUM(ST_Area(geom)) AS total_area FROM {tbl}"
             ") AS foo2"
             ") AS foo WHERE isthesame = 1 "
             "GROUP BY isthesame").format(
                 lulc_1=fprop(uShp[0], 'fn'),
                 lulc_2=fprop(uShp[1], 'fn'),
                 map1_cls=__SHAPES_TO_COMPARE[uShp[0]],
                 map2_cls=__SHAPES_TO_COMPARE[uShp[1]],
                 tbl=union_tbl),
            api='psql')

        # Produce confusion matrix for the pair in comparison
        matrixTbl = tbl_to_area_mtx(DB, union_tbl,
                                    __SHAPES_TO_COMPARE[uShp[0]],
                                    __SHAPES_TO_COMPARE[uShp[1]],
                                    union_tbl + '_mtx')

        SYNTH_TBL[uShp] = {"TOTAL": areaMapTbl, "MATRIX": matrixTbl}

    # UNION ALL TOTAL TABLES
    total_table = tbls_to_tbl(DB, [SYNTH_TBL[k]["TOTAL"] for k in SYNTH_TBL],
                              'total_table')

    # Create table with % of agreement between each pair of maps
    mapsNames = q_to_obj(
        DB,
        ("SELECT lulc FROM ("
         "SELECT lulc_1 AS lulc FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc FROM {tbl} GROUP BY lulc_2"
         ") AS lu GROUP BY lulc ORDER BY lulc").format(tbl=total_table),
        db_api='psql').lulc.tolist()

    FLDS_TO_PIVOT = ["agree_percentage", "total_area"]

    Q = ("SELECT * FROM crosstab('"
         "SELECT CASE "
         "WHEN foo.lulc_1 IS NOT NULL THEN foo.lulc_1 ELSE jtbl.tmp1 "
         "END AS lulc_1, CASE "
         "WHEN foo.lulc_2 IS NOT NULL THEN foo.lulc_2 ELSE jtbl.tmp2 "
         "END AS lulc_2, CASE "
         "WHEN foo.{valCol} IS NOT NULL THEN foo.{valCol} ELSE 0 "
         "END AS agree_percentage FROM ("
         "SELECT lulc_1, lulc_2, {valCol} FROM {tbl} UNION ALL "
         "SELECT lulc_1, lulc_2, {valCol} FROM ("
         "SELECT lulc_1 AS lulc_2, lulc_2 AS lulc_1, {valCol} "
         "FROM {tbl}"
         ") AS tst"
         ") AS foo FULL JOIN ("
         "SELECT lulc_1 AS tmp1, lulc_2 AS tmp2 FROM ("
         "SELECT lulc_1 AS lulc_1 FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc_1 FROM {tbl} GROUP BY lulc_2"
         ") AS tst_1, ("
         "SELECT lulc_1 AS lulc_2 FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc_2 FROM {tbl} GROUP BY lulc_2"
         ") AS tst_2 WHERE lulc_1 = lulc_2 GROUP BY lulc_1, lulc_2"
         ") AS jtbl ON foo.lulc_1 = jtbl.tmp1 AND foo.lulc_2 = jtbl.tmp2 "
         "ORDER BY lulc_1, lulc_2"
         "') AS ct("
         "lulc_map text, {crossCols}"
         ")")

    TOTAL_AGREE_TABLE = None
    TOTAL_AREA_TABLE = None
    for f in FLDS_TO_PIVOT:
        if not TOTAL_AGREE_TABLE:
            TOTAL_AGREE_TABLE = q_to_ntbl(
                DB,
                "agreement_table",
                Q.format(tbl=total_table,
                         valCol=f,
                         crossCols=", ".join([
                             "{} numeric".format(map_) for map_ in mapsNames
                         ])),
                api='psql')

        else:
            TOTAL_AREA_TABLE = q_to_ntbl(DB,
                                         "area_table",
                                         Q.format(tbl=total_table,
                                                  valCol=f,
                                                  crossCols=", ".join([
                                                      "{} numeric".format(map_)
                                                      for map_ in mapsNames
                                                  ])),
                                         api='psql')

    # Union Mapping
    UNION_MAPPING = pandas.DataFrame(
        [[k[0], k[1], fprop(UNION_SHAPE[k], 'fn')] for k in UNION_SHAPE],
        columns=['shp_a', 'shp_b', 'union_shp'])

    UNION_MAPPING = df_to_db(DB, UNION_MAPPING, 'union_map', api='psql')

    # Export Results
    TABLES = [UNION_MAPPING, TOTAL_AGREE_TABLE, TOTAL_AREA_TABLE
              ] + [SYNTH_TBL[x]["MATRIX"] for x in SYNTH_TBL]

    SHEETS = ["union_map", "agreement_percentage", "area_with_data_km"] + [
        "{}_{}".format(fprop(x[0], 'fn')[:15],
                       fprop(x[1], 'fn')[:15]) for x in SYNTH_TBL
    ]

    db_to_tbl(DB, ["SELECT * FROM {}".format(x) for x in TABLES],
              REPORT,
              sheetsNames=SHEETS,
              dbAPI='psql')

    return REPORT

Exemple #8

0

Afficher le fichier

def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE):
    """
    Search for data in DSN and other platforms by cell
    """

    import time
    from glass.g.rd.shp import shp_to_obj
    from glass.ng.sql.db import create_db
    from glass.g.acq.dsn.fb.places import places_by_query
    from glass.g.prj.obj import df_prj
    from glass.ng.pd import merge_df
    from glass.g.it.shp import dbtbl_to_shp
    from glass.ng.sql.q import q_to_ntbl
    from glass.g.wt.sql import df_to_db

    # Open GRID SHP
    GRID_DF = shp_to_obj(GRID_PNT)
    GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF

    GRID_DF["lng"] = GRID_DF.geometry.x.astype(float)
    GRID_DF["lat"] = GRID_DF.geometry.y.astype(float)
    GRID_DF["grid_id"] = GRID_DF.index

    # GET DATA
    RESULTS = []

    def get_data(row, datasrc):
        if datasrc == 'facebook':
            d = places_by_query({
                'x': row.lng,
                'y': row.lat,
                'r': RADIUS
            },
                                4326,
                                keyword=None,
                                epsgOut=EPSG,
                                _limit='100',
                                onlySearchAreaContained=None)

        else:
            raise ValueError(
                '{} as datasource is not a valid value'.format(datasrc))

        if type(d) == int:
            return

        d['grid_id'] = row.grid_id

        RESULTS.append(d)

        time.sleep(5)

    GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1)

    RT = merge_df(RESULTS)

    # Create DB
    create_db(db, overwrite=True, api='psql')

    # Send Data to PostgreSQL
    df_to_db(db,
             RT,
             "{}_data".format(DATA_SOURCE),
             EPSG,
             "POINT",
             colGeom='geometry' if 'geometry' in RT.columns.values else 'geom')

    COLS = [
        x for x in RT.columns.values if x != "geometry" and \
        x != 'geom' and x != "grid_id"
    ] + ["geom"]

    GRP_BY_TBL = q_to_ntbl(
        db,
        "{}_grpby".format(DATA_SOURCE),
        ("SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id "
         "FROM {dtsrc}_data GROUP BY {cols}").format(cols=", ".join(COLS),
                                                     dtsrc=DATA_SOURCE),
        api='psql')

    dbtbl_to_shp(db, GRP_BY_TBL, "geom", OUTPUT_TABLE, api="psql", epsg=EPSG)

    return OUTPUT_TABLE

Exemple #9

0

Afficher le fichier

def psql_to_djgdb(sql_dumps,
                  db_name,
                  djg_proj=None,
                  mapTbl=None,
                  userDjgAPI=None):
    """
    Import PGSQL database in a SQL Script into the database
    controlled by one Django Project
    
    To work, the name of a model instance of type foreign key should be
    equal to the name of the 'db_column' clause.
    """

    import os
    from glass.pys import __import
    from glass.pys import obj_to_lst
    from glass.ng.sql.db import restore_tbls
    from glass.ng.sql.db import create_db, drop_db
    from glass.ng.prop.sql import lst_tbl
    from glass.ng.sql.q import q_to_obj
    from glass.webg.djg.mdl.rel import order_mdl_by_rel
    from glass.webg.djg.mdl.i import lst_mdl_proj

    # Global variables
    TABLES_TO_EXCLUDE = [
        'geography_columns', 'geometry_columns', 'spatial_ref_sys',
        'raster_columns', 'raster_columns', 'raster_overviews',
        'pointcloud_formats', 'pointcloud_columns'
    ]

    # Several SQL Files are expected
    sql_scripts = obj_to_lst(sql_dumps)

    # Create Database
    tmp_db_name = db_name + '_xxxtmp'
    create_db(tmp_db_name)

    # Restore tables in SQL files
    for sql in sql_scripts:
        restore_tbls(tmp_db_name, sql)

    # List tables in the database
    tables = [x for x in lst_tbl(tmp_db_name, excludeViews=True, api='psql')
              ] if not mapTbl else mapTbl

    # Open Django Project
    if djg_proj:
        from glass.webg.djg import get_djgprj
        application = get_djgprj(djg_proj)

    # List models in project
    app_mdls = lst_mdl_proj(djg_proj, thereIsApp=True, returnClassName=True)

    data_tbl = {}
    for t in tables:
        if t == 'auth_user' or t == 'auth_group' or t == 'auth_user_groups':
            data_tbl[t] = t

        elif t.startswith('auth') or t.startswith('django'):
            continue

        elif t not in app_mdls or t in TABLES_TO_EXCLUDE:
            continue

        else:
            data_tbl["{}.models.{}".format(t.split('_')[0], app_mdls[t])] = t

    from django.contrib.gis.db import models
    mdl_cls = [
        "{}.models.{}".format(m.split('_')[0], app_mdls[m]) for m in app_mdls
    ]
    orderned_table = order_mdl_by_rel(mdl_cls)

    # Add default tables of Django
    def_djg_tbl = []
    if 'auth_group' in data_tbl:
        def_djg_tbl.append('auth_group')

    if 'auth_user' in data_tbl:
        def_djg_tbl.append('auth_user')

    if 'auth_user_groups' in data_tbl:
        def_djg_tbl.append('auth_user_groups')

    orderned_table = def_djg_tbl + orderned_table

    if userDjgAPI:
        for table in orderned_table:
            # Map pgsql table data
            tableData = q_to_obj(tmp_db_name, data_tbl[table], of='dict')

            # Table data to Django Model
            if table == 'auth_user':
                mdl_cls = __import('django.contrib.auth.models.User')
            elif table == 'auth_group':
                mdl_cls = __import('django.contrib.auth.models.Group')
            else:
                mdl_cls = __import(table)

            __mdl = mdl_cls()

            for row in tableData:
                for col in row:
                    # Check if field is a foreign key
                    field_obj = mdl_cls._meta.get_field(col)

                    if not isinstance(field_obj, models.ForeignKey):
                        # If not, use the value

                        # But first check if value is nan (special type of float)
                        if row[col] != row[col]:
                            row[col] = None

                        setattr(__mdl, col, row[col])

                    else:
                        # If yes, use the model instance of the related table
                        # Get model of the table related com aquela cujos dados
                        # estao a ser restaurados
                        related_name = field_obj.related_model.__name__
                        related_model = __import('{a}.models.{m}'.format(
                            a=table.split('_')[0], m=related_name))

                        # If NULL, continue
                        if not row[col]:
                            setattr(__mdl, col, row[col])
                            continue

                        related_obj = related_model.objects.get(
                            pk=int(row[col]))

                        setattr(__mdl, col, related_obj)
                __mdl.save()
    else:
        import pandas as pd
        from glass.ng.sql.q import q_to_obj
        from glass.g.wt.sql import df_to_db

        for tbl in orderned_table:
            if tbl not in data_tbl:
                continue

            data = q_to_obj(tmp_db_name,
                            "SELECT * FROM {}".format(data_tbl[tbl]))

            if tbl == 'auth_user':
                data['last_login'] = pd.to_datetime(data.last_login, utc=True)
                data['date_joined'] = pd.to_datetime(data.date_joined,
                                                     utc=True)

            df_to_db(db_name, data, data_tbl[tbl], append=True)

    drop_db(tmp_db_name)

Exemple #10

0

Afficher le fichier

Fichier : __init__.py Projet : jasp382/glass

    def run_viewshed_by_cpu(tid, db, obs, dem, srs,
        vis_basename='vis', maxdst=None, obselevation=None):
        # Create Database
        new_db = create_db("{}_{}".format(db, str(tid)), api='psql')
        
        # Points to Database
        pnt_tbl = df_to_db(
            new_db, obs, 'pnt_tbl', api='psql', 
            epsg=srs, geomType='Point', colGeom='geometry')

        # Create GRASS GIS Session
        workspace = mkdir(os.path.join(
            os.path.dirname(dem), 'work_{}'.format(str(tid))
        ))
        loc_name = 'vis_loc'
        gbase = run_grass(workspace, location=loc_name, srs=dem)

        # Start GRASS GIS Session
        import grass.script as grass
        import grass.script.setup as gsetup
        gsetup.init(gbase, workspace, loc_name, 'PERMANENT')

        from glass.g.it.rst   import rst_to_grs, grs_to_rst
        from glass.g.rst.surf import grs_viewshed
        from glass.g.deldt    import del_rst

        # Send DEM to GRASS GIS
        grs_dem = rst_to_grs(dem, 'grs_dem', as_cmd=True)
    
        # Produce Viewshed for each point in obs
        for idx, row in obs.iterrows():
            # Get Viewshed raster
            vrst = grs_viewshed(
                grs_dem, (row.geometry.x, row.geometry.y),
                '{}_{}'.format(vis_basename, str(row[obs_id])),
                max_dist=maxdst, obs_elv=obselevation
            )
            
            # Export Raster to File
            frst = grs_to_rst(vrst, os.path.join(workspace, vrst + '.tif'))
            
            # Raster to Array
            img = gdal.Open(frst)
            num = img.ReadAsArray()
            
            # Two Dimension to One Dimension
            # Reshape Array
            numone = num.reshape(num.shape[0] * num.shape[1])
            
            # Get Indexes with visibility
            visnum = np.arange(numone.shape[0]).astype(np.uint32)
            visnum = visnum[numone == 1]

            # Get Indexes intervals
            visint = get_minmax_fm_seq_values(visnum)
            
            # Get rows indexes
            _visint = visint.reshape(visint.shape[0] * visint.shape[1])
            visrow = _visint / num.shape[1]
            visrow = visrow.astype(np.uint32)
            
            # Get cols indexes
            viscol = _visint - (visrow * num.shape[1])

            # Reshape
            visrow = visrow.reshape(visint.shape)
            viscol = viscol.reshape(visint.shape)

            # Split array
            irow, erow = np.vsplit(visrow.T, 1)[0]
            icol, ecol = np.vsplit(viscol.T, 1)[0]
            
            # Visibility indexes to Pandas DataFrame
            idxnum = np.full(irow.shape, row[obs_id])
            
            visdf = pd.DataFrame({
                'pntid' : idxnum, 'rowi' : irow, 'rowe' : erow,
                'coli': icol, 'cole' : ecol
            })
            
            # Pandas DF to database
            # Create Visibility table
            df_to_db(
                new_db, visdf, vis_basename,
                api='psql', colGeom=None,
                append=None if not idx else True
            )
            
            # Delete all variables
            numone  = None
            visnum  = None
            visint  = None
            _visint = None
            visrow  = None
            viscol  = None
            irow    = None
            erow    = None
            icol    = None
            ecol    = None
            idxnum  = None
            visdf   = None
            del img
            
            # Delete GRASS GIS File
            del_rst(vrst)
            
            # Delete TIFF File
            del_file(frst)
            frst = None

Exemple #11

0

Afficher le fichier

Fichier : tw_collect.py Projet : jasp382/glass

def search_by_keyword(db, out_tbl, qarea, wgrp=None):
    """
    Get data using keywords
    """

    import os
    import pandas           as pd
    from multiprocessing    import Process, Manager
    from glass.cons.dsn     import search_words, tw_key
    from glass.ng.pd       import merge_df
    from glass.ng.pd.split import df_split
    from glass.g.wt.sql    import df_to_db

    # Get API Keys
    keys = tw_key()

    # Get search words
    words = search_words(group=wgrp)

    # Split search words
    search_words = [words] if len(keys) == 1 else df_split(words, len(keys))

    # Search for data
    with Manager() as manager:
        DFS       = manager.list()
        LOG_LST   = manager.list()
        DROP_COLS = ["retweeted"]
    
        # Create Threads
        thrds = [Process(
            name='tk{}'.format(str(i)), target=get_tweets,
            args=(DFS, LOG_LST, search_words[i], qarea, keys[i], DROP_COLS, i)
        ) for i in range(len(search_words))]

        for t in thrds:
            t.start()
    
        for t in thrds:
            t.join()
    
        if not len(DFS):
            raise ValueError('NoData was collected!')
    
        # Merge all dataframes
        if len(DFS) == 1:
            all_df = DFS[0]
    
        else:
            all_df = merge_df(DFS, ignIndex=True, ignoredfstype=True)
    
        all_df.rename(columns={"user" : "username"}, inplace=True)

        # Sanitize time reference
        all_df['daytime'] = pd.to_datetime(all_df.tweet_time)
        all_df.daytime = all_df.daytime.astype(str)
        all_df.daytime = all_df.daytime.str.slice(start=0, stop=-6)
        all_df.drop('tweet_time', axis=1, inplace=True)

        # Rename cols
        all_df.rename(columns={
            'text' : 'txt', 'tweet_lang' : 'tlang', 'user_id' : 'userid',
            'user_location' : 'userloc', 'place_country' : 'placecountry',
            'place_countryc' : 'placecountryc', 'place_name' : 'placename',
            'place_box' : 'placebox', 'place_id' : 'placeid',
            'followers_count' : 'followersn'
        }, inplace=True)

        # Data to new table
        df_to_db(db, all_df, out_tbl, append=True, api='psql')

        # Write log file
        log_txt = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            '{}-log.txt'.format(out_tbl)
        )
        with open(log_txt, 'w') as f:
            f.write("\n".join(LOG_LST))
    
    return log_txt

Exemple #12

0

Afficher le fichier

                os.path.join(workspace,
                             'fgrid_{}_{}.shp'.format(row[idcol], str(i + 1))),
                'area')

            clpfnet = shp_to_obj(clp_ffnet, outgeom='geom')
            clpfnet = del_cols_notin_ref(clpfnet,
                                         list(cols_b.keys()),
                                         geomCol="geom")
            clpfnet.rename(columns=cols_b, inplace=True)

            main_df = main_df.append(clpfnet, ignore_index=True, sort=False)

    main_df['id'] = main_df.index
    main_df['areag'] = main_df.geom.area
    main_df = main_df[main_df.areag > 1]

    main_df.drop(['sid', 'areag'], axis=1, inplace=True)

    # Send data to database
    df_to_db(db,
             main_df,
             grid_tbl,
             append=True,
             api='psql',
             epsg=epsg,
             geomType="Polygon",
             colGeom='geom',
             dbset=setdb)

    obj_to_shp(main_df, 'geom', epsg, out_grid)

Exemple #13

0

Afficher le fichier

def shp_to_psql(dbname,
                shpData,
                pgTable=None,
                api="pandas",
                mapCols=None,
                srsEpsgCode=None,
                encoding="UTF-8",
                dbset='default'):
    """
    Send Shapefile to PostgreSQL
    
    if api is equal to "pandas" - GeoPandas API will be used;
    if api is equal to "shp2pgsql" - shp2pgsql tool will be used.
    """

    import os
    from glass.pys.oss import fprop
    from glass.g.prop.prj import get_shp_epsg

    # If defined, srsEpsgCode must be a integer value
    if srsEpsgCode:
        if type(srsEpsgCode) != int:
            raise ValueError('srsEpsgCode should be a integer value')

    if api == "pandas":
        from glass.ng.rd import tbl_to_obj
        from glass.g.wt.sql import df_to_db
        from glass.g.prop.feat import get_gtype

    elif api == "shp2pgsql":
        from glass.pys import execmd
        from glass.ng.sql import psql_cmd
        from glass.pys.oss import del_file

    else:
        raise ValueError(
            'api value is not valid. options are: pandas and shp2pgsql')

    # Check if shp is folder
    if os.path.isdir(shpData):
        from glass.pys.oss import lst_ff

        shapes = lst_ff(shpData, file_format='.shp')

    else:
        from glass.pys import obj_to_lst

        shapes = obj_to_lst(shpData)

    epsgs = [get_shp_epsg(i)
             for i in shapes] if not srsEpsgCode else [srsEpsgCode]

    if None in epsgs:
        raise ValueError(
            ("Cannot obtain EPSG code. Use the srsEpsgCode parameter "
             "to specify the EPSG code of your data."))

    tables = []
    for _i in range(len(shapes)):
        # Get Table name
        tname = fprop(shapes[_i], 'fn', forceLower=True) if not pgTable else \
            pgTable[_i] if type(pgTable) == list else pgTable if len(shapes) == 1 \
            else pgTable + '_{}'.format(_i+1)

        # Import data
        if api == "pandas":
            # SHP to DataFrame
            df = tbl_to_obj(shapes[_i])

            if not mapCols:
                df.rename(columns={x: x.lower()
                                   for x in df.columns.values},
                          inplace=True)
            else:
                renameD = {
                    x : mapCols[x].lower() if x in mapCols else \
                    x.lower() for x in df.columns.values
                }
                df.rename(columns=renameD, inplace=True)

            if "geometry" in df.columns.values:
                geomCol = "geometry"

            elif "geom" in df.columns.values:
                geomCol = "geom"

            else:
                raise ValueError("No Geometry found in shp")

            # GeoDataFrame to PSQL
            df_to_db(dbname,
                     df,
                     tname,
                     append=True,
                     api='psql',
                     epsg=epsgs[_i] if not srsEpsgCode else srsEpsgCode,
                     colGeom=geomCol,
                     geomType=get_gtype(shapes[_i],
                                        name=True,
                                        py_cls=False,
                                        gisApi='ogr'))

        else:
            sql_script = os.path.join(os.path.dirname(shapes[_i]),
                                      tname + '.sql')

            cmd = ('shp2pgsql -I -s {epsg} -W {enc} '
                   '{shp} public.{name} > {out}').format(
                       epsg=epsgs[_i] if not srsEpsgCode else srsEpsgCode,
                       shp=shapes[_i],
                       name=tname,
                       out=sql_script,
                       enc=encoding)

            outcmd = execmd(cmd)

            psql_cmd(dbname, sql_script, dbcon=dbset)

            del_file(sql_script)

        tables.append(tname)

    return tables[0] if len(tables) == 1 else tables