Python query_to_df Examples, gasp.fm.sql.query_to_df Python Examples

Example #1

0

Show file

def copy_fromdb_todb(conFromDb, conToDb, tables, qForTbl=None):
    """
    Send PGSQL Tables from one database to other
    """

    import pandas
    from gasp import goToList
    from gasp.fm.sql import query_to_df
    from gasp.sql.mng.fld import cols_name
    from gasp.to.sql import df_to_db

    tables = goToList(tables)

    for table in tables:
        cols = cols_name(conFromDb, table)

        if not qForTbl:
            tblDf = query_to_df(conFromDb,
                                "SELECT {} FROM {}".format(
                                    ", ".join(cols), table),
                                db_api='psql')

        else:
            if table not in qForTbl:
                tblDf = query_to_df(conFromDb,
                                    "SELECT {} FROM {}".format(
                                        ", ".join(cols), table),
                                    db_api='psql')

            else:
                tblDf = query_to_df(conFromDb, qForTbl[table], db_api='psql')

        df_to_db(conToDb, tblDf, table, api='psql')

Example #2

0

Show file

def copy_fromdb_todb(conFromDb, conToDb, tables, qForTbl=None, api='pandas'):
    """
    Send PGSQL Tables from one database to other
    """

    from gasp import goToList

    api = 'pandas' if api != 'pandas' and api != 'psql' else api

    tables = goToList(tables)

    if api == 'pandas':
        from gasp.fm.sql import query_to_df
        from gasp.to.sql import df_to_db

        for table in tables:
            if not qForTbl:
                tblDf = query_to_df(conFromDb,
                                    "SELECT * FROM {}".format(table),
                                    db_api='psql')

            else:
                if table not in qForTbl:
                    tblDf = query_to_df(conFromDb,
                                        "SELECT * FROM {}".format(table),
                                        db_api='psql')

                else:
                    tblDf = query_to_df(conFromDb,
                                        qForTbl[table],
                                        db_api='psql')

            df_to_db(conToDb, tblDf, table, api='psql')

    else:
        import os
        from gasp.oss.ops import create_folder, del_folder
        from gasp.sql.mng.tbl import dump_table
        from gasp.sql.mng.tbl import restore_table

        tmpFolder = create_folder(os.path.dirname(os.path.abspath(__file__)),
                                  randName=True)

        for table in tables:
            # Dump
            sqlScript = dump_table(conFromDb, table,
                                   os.path.join(tmpFolder, table + ".sql"))

            # Restore
            tblname = restore_table(conToDb, sqlScript, table)

        del_folder(tmpFolder)

Example #3

0

Show file

def get_osm_feat_by_rule(nomenclature):

    from gasp.fm.sql import query_to_df

    Q = ("SELECT jtbl.{rellulccls}, {osmfeat}.{key}, {osmfeat}.{val}, "
         "jtbl.{ruleName}, jtbl.{bufferCol}, jtbl.{areaCol} "
         "FROM {osmfeat} INNER JOIN ("
         "SELECT {osmrel}.{relosmid}, {osmrel}.{rellulccls}, "
         "{rules}.{ruleID}, {rules}.{ruleName}, "
         "{osmrel}.{bufferCol}, {osmrel}.{areaCol} "
         "FROM {osmrel} "
         "INNER JOIN {rules} ON {osmrel}.{_ruleID} = {rules}.{ruleID} "
         ") AS jtbl ON {osmfeat}.{osmid} = jtbl.{relosmid}").format(
             osmfeat=DB_SCHEMA["OSM_FEATURES"]["NAME"],
             osmid=DB_SCHEMA["OSM_FEATURES"]["OSM_ID"],
             key=DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"],
             val=DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"],
             osmrel=DB_SCHEMA[nomenclature]["OSM_RELATION"],
             relosmid=DB_SCHEMA[nomenclature]["OSM_FK"],
             rellulccls=DB_SCHEMA[nomenclature]["CLS_FK"],
             _ruleID=DB_SCHEMA[nomenclature]["RULE_FK"],
             rules=DB_SCHEMA["RULES"]["NAME"],
             ruleID=DB_SCHEMA["RULES"]["RULE_ID"],
             ruleName=DB_SCHEMA["RULES"]["RULE_NAME"],
             bufferCol=DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"],
             areaCol=DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"])

    return query_to_df(PROCEDURE_DB, Q, db_api='sqlite')

Example #4

0

Show file

File: toxls.py Project: zonakre/gasp

def bar_chart_by_query(conParam, query, xaxis_col, chart_data_fld_name, series,
                       outxls):
    """
    Execute some query for each serie in series.
    Produce a bar chart with the data returned.
    Each serie will be new bar in the chart.
    So, the query statment must have a white space
    to put each serie in series... E.g.:
    "SELECT fld_name FROM t WHERE series_fld={}".format(serie in series)
    """

    from gasp.fm.sql import query_to_df
    from gasp.mng.joins import combine_dfs
    from gasp.mng.xlstbx.charts import barchart_in_xls_from_df

    dataDf = []
    for serie in series:
        data = query_to_df(conParam, query.format(serie), db_api='psql')
        data.rename(columns={chart_data_fld_name: serie}, inplace=True)

        dataDf.append(data)

    main_df = dataDf[0]
    main_df = combine_dfs(main_df, dataDf[1:], xaxis_col)

    outxls = barchart_in_xls_from_df(main_df, outxls, series, xaxis_col)

    return outxls

Example #5

0

Show file

File: time.py Project: zonakre/gasp

 def thereIsRowsSameTimeInt(row):
     whr = []
     for c in COLS:
         if COLS_TYPE[c] == str:
             whr.append("{}='{}'".format(c, row[c]))
         else:
             whr.append("{}={}".format(c, row[c]))
     
     hourRows = query_to_df(conParam,
         "SELECT {} FROM {} WHERE {}".format(
             hour_decimal_field, table,
             " AND ".join(whr)
         ), db_api='psql'
     )[hour_decimal_field].tolist()
     
     for i in range(len(hourRows)):
         for e in range(i+1, len(hourRows)):
             dif = abs(hourRows[i][0] - hourRows[e][0])
             
             if dif < TIME_TOLERANCE:
                 break
         
         if dif < TIME_TOLERANCE:
             break
     
     if dif < TIME_TOLERANCE:
         row['time_difference'] = 1
     else:
         row['time_difference'] = 0
     
     return row

Example #6

0

Show file

File: split.py Project: zonakre/gasp

def split_table_by_col_distinct(conParam, pgtable, column):
    """
    Create a new table for each value in one column
    """

    from gasp.fm.sql import query_to_df
    from gasp.sql.mng.fld import get_columns_type
    from gasp.sql.mng.qw import ntbl_by_query

    fields_types = get_columns_type(conParam, pgtable)

    # Get unique values
    VALUES = query_to_df(conParam,
                         "SELECT {col} FROM {t} GROUP BY {col}".format(
                             col=interest_column, t=pgtable),
                         db_api='psql')[interest_column].tolist()

    whr = '{}=\'{}\'' if fields_types[interest_column] == str else '{}={}'

    for row in VALUES:
        ntbl_by_query(conParam,
                      '{}_{}'.format(pgtable, str(row[0])),
                      "SELECT * FROM {} WHERE {}".format(
                          pgtable, whr.format(interest_column, str(row[0]))),
                      api='psql')

Example #7

0

Show file

def osm_features_by_rule(nomenclature, rule):

    from gasp.fm.sql import query_to_df

    COLS = [
        DB_SCHEMA[nomenclature]["CLS_FK"],
        DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"],
        DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"]
    ]

    if rule == 'area_upper' or rule == 'area_lower':
        rule_info_field = ", {}.{}".format(
            DB_SCHEMA[nomenclature]["OSM_RELATION"],
            DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"])

        rule_info_field_ = ", osm_cls_rule.{}".format(
            DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"])

        COLS.append(DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"])

    elif rule == 'roads' or rule == 'basic_buffer':
        rule_info_field = ', {}.{}'.format(
            DB_SCHEMA[nomenclature]["OSM_RELATION"],
            DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"])

        rule_info_field_ = ", osm_cls_rule.{}".format(
            DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"])

        COLS.append(DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"])

    else:
        rule_info_field = ""
        rule_info_field_ = ""

    QUERY = ("SELECT osm_cls_rule.{rellulccls}, {osmfeat}.{key}, "
             "{osmfeat}.{val}{rsupfield_} FROM {osmfeat} INNER JOIN ("
             "SELECT {osmrel}.{relosmid}, {osmrel}.{rellulccls}, "
             "{rules}.{_rule_id}, {rules}.{rule_name}{rsupfield} "
             "FROM {osmrel} "
             "INNER JOIN {rules} ON {osmrel}.{rule_id} = {rules}.{_rule_id} "
             "WHERE {rules}.{rule_name} = '{rule_in_processing}'"
             ") AS osm_cls_rule ON {osmfeat}.{osmid} = osm_cls_rule.{relosmid}"
             ).format(osmfeat=DB_SCHEMA["OSM_FEATURES"]["NAME"],
                      osmid=DB_SCHEMA["OSM_FEATURES"]["OSM_ID"],
                      key=DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"],
                      val=DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"],
                      osmrel=DB_SCHEMA[nomenclature]["OSM_RELATION"],
                      relosmid=DB_SCHEMA[nomenclature]["OSM_FK"],
                      rellulccls=DB_SCHEMA[nomenclature]["CLS_FK"],
                      rule_id=DB_SCHEMA[nomenclature]["RULE_FK"],
                      rules=DB_SCHEMA["RULES"]["NAME"],
                      _rule_id=DB_SCHEMA["RULES"]["RULE_ID"],
                      rule_name=DB_SCHEMA["RULES"]["RULE_NAME"],
                      rule_in_processing=rule,
                      rsupfield=rule_info_field,
                      rsupfield_=rule_info_field_)

    osm_featTable = query_to_df(PROCEDURE_DB, QUERY, db_api='sqlite')

    return osm_featTable

Example #8

0

Show file

File: m3_4.py Project: fagan2888/gasp

def rst_area(osmLink, polygonTable, UPPER=True, api='SQLITE'):
    """
    Select features with area upper than.
    
    A field with threshold is needed in the database.
    """
    
    import datetime
    from gasp.fm.sql import query_to_df
    if api == 'POSTGIS':
        from gasp.to.shp.grs import psql_to_grs as db_to_grs
    else:
        from gasp.to.shp.grs import sqlite_to_shp as db_to_grs
    from gasp.to.rst         import shp_to_raster
    from gasp.osm2lulc.var   import GEOM_AREA
    
    RULE_COL = 'area_upper' if UPPER else 'area_lower'
    OPERATOR = " > " if UPPER else " < "
    
    WHR = "{ga} {op} t_{r} AND {r}={cls_}"
    
    # Get Classes
    time_a = datetime.datetime.now().replace(microsecond=0)
    lulcCls = query_to_df(osmLink, (
        "SELECT {r} FROM {tbl} WHERE {ga} {op} t_{r} GROUP BY {r}"
    ).format(
         r=RULE_COL, tbl=polygonTable, ga=GEOM_AREA, op=OPERATOR
    ), db_api='psql' if api == 'POSTGIS' else 'sqlite')[RULE_COL].tolist()
    time_b = datetime.datetime.now().replace(microsecond=0)
    
    timeGasto = {0 : ('check_cls', time_b - time_a)}
    
    # Import data into GRASS and convert it to raster
    clsRst = {}
    tk = 1
    for cls in lulcCls:
        time_x = datetime.datetime.now().replace(microsecond=0)
        grsVect = db_to_grs(
            osmLink, polygonTable, "{}_{}".format(RULE_COL, cls),
            where=WHR.format(
                op=OPERATOR, r=RULE_COL, ga=GEOM_AREA, cls_=cls
            ), notTable=True, filterByReg=True
        )
        time_y = datetime.datetime.now().replace(microsecond=0)
        timeGasto[tk] = ('import_{}'.format(cls), time_y - time_x)
        
        grsRst = shp_to_raster(
            grsVect, int(cls), None, None, "rst_{}".format(RULE_COL),
            api='grass'
        )
        time_z = datetime.datetime.now().replace(microsecond=0)
        timeGasto[tk+1] = ('torst_{}'.format(cls), time_z - time_y)
        
        clsRst[int(cls)] = grsRst
        tk += 2
        
    return clsRst, timeGasto

Example #9

0

Show file

File: split.py Project: zonakre/gasp

def split_table_entity_number(conP, table, entity_field, entity_number):
    """
    Split tables in several using as reference a number of entities per table
    
    If a table has 1 000 000 entities and the entity_number is 250 000,
    this method will create four tables, each one with 250 000 entities.
    250 000 entities, not rows. Don't forget that the main table may have
    more than one reference to the same entity.
    """

    import pandas
    from gasp.fm.sql import query_to_df
    from gasp.sql.mng.fld import get_columns_type
    from gasp.sql.mng.qw import ntbl_by_query

    # Select entities in table
    entities = query_to_df(conP,
                           "SELECT {c} FROM {t} GROUP BY {c}".format(
                               c=entity_field, t=table),
                           db_api='psql')

    # Split entities into groups acoording entity_number
    entityGroup = []

    lower = 0
    high = entity_number
    while lower <= len(entities.index):
        if high > len(entities.index):
            high = len(entities.index)

        entityGroup.append(entities.iloc[lower:high])

        lower += entity_number
        high += entity_number

    # For each dataframe, create a new table
    COLS_TYPE = get_columns_type(conP, table)

    c = 0
    for df in entityGroup:
        if COLS_TYPE[entity_field] != str:
            df[entity_field] = '{}='.format(
                entity_field) + df[entity_field].astype(str)
        else:
            df[entity_field] = '{}=\''.format(
                entity_field) + df[entity_field].astype(str) + '\''

        whr = ' OR '.join(df[entity_field])

        ntbl_by_query(conP,
                      '{}_{}'.format(table, str(c)),
                      ("SELECT * FROM {} WHERE {}").format(table, whr),
                      api='psql')

        c += 1

Example #10

0

Show file

File: __init__.py Project: zonakre/gasp

def get_keys():
    """
    Return available keys for this API
    """

    from gasp.fm.sql import query_to_df

    keys = query_to_df(APIS_DB, ("SELECT fid, key, date, nrqst FROM google "
                                 "ORDER BY fid"),
                       db_api='sqlite')

    return keys

Example #11

0

Show file

def grs_rst(osmLink, polyTbl, api='SQLITE'):
    """
    Simple selection, convert result to Raster
    """

    import datetime
    from gasp.fm.sql import query_to_df
    if api == 'POSTGIS':
        from gasp.to.shp.grs import psql_to_grs as db_to_grs
    else:
        from gasp.to.shp.grs import sqlite_to_shp as db_to_grs
    from gasp.to.rst import shp_to_raster

    # Get Classes
    time_a = datetime.datetime.now().replace(microsecond=0)
    lulcCls = query_to_df(
        osmLink,
        ("SELECT selection FROM {} "
         "WHERE selection IS NOT NULL "
         "GROUP BY selection").format(polyTbl),
        db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist()
    time_b = datetime.datetime.now().replace(microsecond=0)

    timeGasto = {0: ('check_cls', time_b - time_a)}

    # Import data into GRASS and convert it to raster
    clsRst = {}
    tk = 1
    for cls in lulcCls:
        time_x = datetime.datetime.now().replace(microsecond=0)
        grsVect = db_to_grs(osmLink,
                            polyTbl,
                            "rule1_{}".format(str(cls)),
                            where="selection = {}".format(str(cls)),
                            notTable=True,
                            filterByReg=True)
        time_y = datetime.datetime.now().replace(microsecond=0)

        grsRst = shp_to_raster(grsVect,
                               int(cls),
                               None,
                               None,
                               "rst_rule1_{}".format(str(cls)),
                               api='grass')
        time_z = datetime.datetime.now().replace(microsecond=0)

        clsRst[int(cls)] = grsRst
        timeGasto[tk] = ('import_{}'.format(cls), time_y - time_x)
        timeGasto[tk + 1] = ('torst_{}'.format(cls), time_z - time_y)

        tk += 2

    return clsRst, timeGasto

Example #12

0

Show file

File: tbl.py Project: fagan2888/gasp

def lst_views(conParam, schema='public'):
    """
    List Views in database
    """

    from gasp.fm.sql import query_to_df

    views = query_to_df(conParam,
                        ("SELECT table_name FROM information_schema.views "
                         "WHERE table_schema='{}'").format(schema),
                        db_api='psql')

    return views.table_name.tolist()

Example #13

0

Show file

File: mod6.py Project: fagan2888/gasp

    def build12_torst(buildTbl):
        LulcCls = query_to_df(
            osmLink,
            "SELECT cls FROM {} GROUP BY cls".format(buildTbl),
            db_api='psql' if apidb == 'POSTGIS' else 'sqlite').cls.tolist()

        for lulc_cls in LulcCls:
            time_m = datetime.datetime.now().replace(microsecond=0)

            # To SHP
            if apidb == 'SQLITE':
                shpB = sel_by_attr(osmLink,
                                   "SELECT * FROM {} WHERE cls={}".format(
                                       buildTbl, str(lulc_cls)),
                                   os.path.join(
                                       folder,
                                       'nshp_build_{}.shp'.format(lulc_cls)),
                                   api_gis='ogr')

            else:
                shpB = sel_by_attr(osmLink,
                                   "SELECT * FROM {} WHERE cls={}".format(
                                       buildTbl, str(lulc_cls)),
                                   os.path.join(
                                       folder,
                                       'nshp_build_{}.shp'.format(lulc_cls)),
                                   api='pgsql2shp',
                                   geom_col="geometry",
                                   tableIsQuery=True)
            time_n = datetime.datetime.now().replace(microsecond=0)

            # To RST
            brst = shp_to_raster(shpB,
                                 None,
                                 cells,
                                 0,
                                 os.path.join(
                                     folder,
                                     'nrst_build_{}.tif'.format(lulc_cls)),
                                 srscode,
                                 rstT,
                                 api='gdal')
            time_o = datetime.datetime.now().replace(microsecond=0)

            resLyr[int(lulc_cls)] = [brst]

            timeGasto[int(lulc_cls)] = ('to_shp_{}'.format(str(lulc_cls)),
                                        time_n - time_m)
            timeGasto[int(lulc_cls) + 1] = ('to_rst_n_{}'.format(
                str(lulc_cls)), time_o - time_n)

Example #14

0

Show file

File: exct.py Project: zonakre/gasp

def get_distinct_values(lnk, pgtable, column):
    """
    Get distinct values in one column of one pgtable
    """

    from gasp import goToList
    from gasp.fm.sql import query_to_df

    data = query_to_df(lnk,
                       "SELECT {col} FROM {t} GROUP BY {col};".format(
                           col=", ".join(goToList(column)), t=pgtable),
                       db_api='psql').to_dict(orient="records")

    return data

Example #15

0

Show file

File: geom.py Project: zonakre/gasp

def check_geomtype_in_table(conParam, table, geomCol='geom'):
    """
    Return the number of geometry types in table
    """

    from gasp.fm.sql import query_to_df

    return int(
        query_to_df(conParam,
                    ("SELECT COUNT(*) AS row_count FROM ("
                     "SELECT ST_GeometryType((ST_Dump({})).geom) AS cnt_geom "
                     "FROM {} GROUP BY ST_GeometryType((ST_Dump({})).geom)"
                     ") AS foo").format(geomCol, table, geomCol),
                    db_api='psql').iloc[0].row_count)

Example #16

0

Show file

def lst_tbl(conObj, schema='public', excludeViews=None, api='psql'):
    """
    list tables in a database
    
    API's Available:
    * psql;
    * sqlite;
    """
    
    if api == 'psql':
        from gasp.fm.sql import query_to_df
    
        tbls = query_to_df(conObj, (
            "SELECT table_name FROM information_schema.tables "
            "WHERE table_schema='{}'"
        ).format(schema), db_api='psql')
    
        if excludeViews:
            views = lst_views(conObj, schema=schema)
        
            __tbls = [i for i in tbls.table_name.tolist() if i not in views]
    
        else:
            __tbls = tbls.table_name.tolist()
    
    elif api == 'sqlite':
        """
        List tables in one sqliteDB
        """
        
        import sqlite3
        
        conn = sqlite3.connect(conObj)
        cursor = conn.cursor()
        
        tables = cursor.execute(
            "SELECT name FROM sqlite_master WHERE type='table';"
        )
        
        __tbls = [n[0] for n in tables]
        cursor.close()
        conn.close()    
    
    else:
        raise ValueError('API {} is not available!'.format(api))
    
    return __tbls

Example #17

0

Show file

def arcg_selection(db, polTbl, fld):
    """
    Select, Dissolve and Reproject using ArcGIS
    """

    import datetime
    import os
    from gasp.mng.genze import dissolve
    from gasp.fm.sql import query_to_df
    from gasp.anls.exct import sel_by_attr

    # Get LULC Classes
    time_a = datetime.datetime.now().replace(microsecond=0)
    lulcCls = query_to_df(
        db,
        ("SELECT selection FROM {} "
         "WHERE selection IS NOT NULL GROUP BY selection").format(polTbl),
        db_api='sqlite').selection.tolist()
    time_b = datetime.datetime.now().replace(microsecond=0)

    timeGasto = {0: ('check_cls', time_b - time_a)}

    # Extract Shps from DB
    clsShp = {}
    tk = 1
    SQL = "SELECT selection, geometry FROM {} WHERE selection={}"
    for cls in lulcCls:
        time_x = datetime.datetime.now().replace(microsecond=0)
        shp = sel_by_attr(db,
                          SQL.format(polTbl, str(cls)),
                          os.path.join(fld, 'rule1_{}.shp'.format(cls)),
                          api_gis='ogr')
        time_y = datetime.datetime.now().replace(microsecond=0)

        dShp = dissolve(shp,
                        os.path.join(fld, "rul1_d_{}.shp".format(str(cls))),
                        "FID",
                        geomMultiPart=True)
        time_z = datetime.datetime.now().replace(microsecond=0)

        clsShp[int(cls)] = dShp
        timeGasto[tk] = ("export_{}".format(cls), time_y - time_x)
        timeGasto[tk + 1] = ("dissolve_{}".format(cls), time_z - time_y)

        tk += 2

    return clsShp, timeGasto

Example #18

0

Show file

File: count.py Project: zonakre/gasp

def count_by_periods_with_certain_duration(conParam, PERIOD_INTERVAL, pgtable,
                                           TIME_FIELD, outTable,
                                           filterWhere=None):
    """
    Count rows in a pgtable by periods of X minutes
    
    PERIOD_INTERVAL = "01:00:00"
    """
    
    import pandas
    from gasp import day_to_intervals2
    
    # Get Intervals
    INTERVALS = day_to_intervals2(PERIOD_INTERVAL)
    
    # For each interval/period, count the number of rows
    counting = None
    for _int_ in INTERVALS:
        QUERY = (
            "SELECT COUNT(*) AS count FROM {table} WHERE "
            "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= "
            "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND "
            "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < "
            "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr}"
        ).format(
            table    =  pgtable, timeCol  = TIME_FIELD,
            minLower = _int_[0], minUpper =   _int_[1],
            whr      = "" if not filterWhere else " AND ({})".format(
                filterWhere
            )
        )
        
        count = query_to_df(conParam, QUERY, db_api='psql')
        
        count.rename(index={0 : "{}-{}".format(
            _int_[0][:5], _int_[1][:5]
        )}, inplace=True)
        
        if type(counting) != pandas.DataFrame:
            counting = count.copy()
        
        else:
            counting = counting.append(count, ignore_index=False)
    
    obj_to_tbl(counting, outTable)
    
    return outTable

Example #19

0

Show file

def row_num(conObj, table, where=None, api='psql'):
    """
    Return the number of rows in Query
    
    API's Available:
    * psql;
    * sqlite;
    """
    
    from gasp.fm.sql import query_to_df
    
    d = query_to_df(conObj, "SELECT COUNT(*) AS nrows FROM {}{}".format(
        table,
        "" if not where else " WHERE {}".format(where)
    ), db_api=api)
    
    return int(d.iloc[0].nrows)

Example #20

0

Show file

File: prop.py Project: zonakre/gasp

def get_tbl_extent(conParam, table, geomCol):
    """
    Return extent of the geometries in one pgtable
    """

    from gasp.fm.sql import query_to_df

    q = ("SELECT MIN(ST_X(pnt_geom)) AS eleft, MAX(ST_X(pnt_geom)) AS eright, "
         "MIN(ST_Y(pnt_geom)) AS bottom, MAX(ST_Y(pnt_geom)) AS top "
         "FROM ("
         "SELECT (ST_DumpPoints({geomcol})).geom AS pnt_geom "
         "FROM {tbl}"
         ") AS foo").format(tbl=table, geomcol=geomCol)

    ext = query_to_df(conParam, q, db_api='psql').to_dict(orient='index')[0]

    return [ext['eleft'], ext['bottom'], ext['eright'], ext['top']]

Example #21

0

Show file

File: count.py Project: zonakre/gasp

def count_entity_periods_with_certain_duration(CON_PSQL, PERIOD_INTERVAL,
                                               PGTABLE, TIME_FIELD, ENTITY_FIELD,
                                               OUT_TABLE, filterWhere=None):
    """
    Count rows in a pgtable for a given period of X minutes for each
    interest entity
    
    PERIOD_INTERVAL = "01:00:00"
    """
    
    import pandas
    from gasp           import day_to_intervals2
    from gasp.mng.joins import combine_dfs
    
    # Get Intervals
    INTERVALS = day_to_intervals2(PERIOD_INTERVAL)
    
    # For each interval/period, count the number of rows by entity
    counting = []
    for _int in INTERVALS:
        Q = (
            "SELECT {entityCol}, COUNT({entityCol}) AS {cntCol} "
            "FROM {table} WHERE "
            "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= "
            "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND "
            "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < "
            "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr} "
            "GROUP BY {entityCol}"
        ).format(
            cntCol = "s{}_e{}".format(_int[0][:5], _int[1][:5]).replace(":", "_"),
            table  = PGTABLE, timeCol=TIME_FIELD, entityCol=ENTITY_FIELD,
            minLower=_int[0], minUpper=_int[1],
            whr = "" if not filterWhere else " AND ({}) ".format(filterWhere)
        )
        
        count = query_to_df(CON_PSQL, Q, db_api='psql')
        
        counting.append(count)
    
    mainDf = combine_dfs(counting[0], counting[1:], ENTITY_FIELD)
    
    obj_to_tbl(mainDf, OUT_TABLE)
    
    return OUT_TABLE

Example #22

0

Show file

def db_to_db(conDBA, conDBB, typeDBA, typeDBB):
    """
    All tables in one Database to other database
    
    typesDB options:
    * sqlite
    * psql
    """

    import os
    from gasp.fm.sql import query_to_df
    from gasp.sql.mng.tbl import lst_tbl
    from gasp.sql.mng.db import create_db

    # List Tables in DB A
    tbls = lst_tbl(conDBA, excludeViews=True, api=typeDBA)

    # Create database B
    if typeDBB == 'psql':
        con_param = {
            "HOST": conDBB["HOST"],
            "USER": conDBB["USER"],
            "PORT": conDBB["PORT"],
            "PASSWORD": conDBB["PASSWORD"]
        }

        if "TEMPLATE" in conDBB:
            con_param["TEMPLATE"] = conDBB["TEMPLATE"]

        NEW_DB = conDBB["DATABASE"]

    else:
        con_param = os.path.dirname(conDBB)
        NEW_DB = os.path.basename(conDBB)

    db_b = create_db(con_param, NEW_DB, overwrite=False, api=typeDBB)

    # Table to Database B
    for tbl in tbls:
        df = query_to_df(conDBA,
                         "SELECT * FROM {}".format(tbl),
                         db_api=typeDBA)

        df_to_db(conDBB, df, tbl, append=None, api=typeDBB)

Example #23

0

Show file

def check_last_id(lnk, pk, table):
    """
    Check last ID of a given table
    
    return 0 if there is no data
    
    TODO: Do this with Pandas
    """
    
    from gasp.sql.c import psqlcon
    from gasp.fm.sql import query_to_df
    
    q = "SELECT MAX({}) AS fid FROM {}".format(pk, table)
    d = query_to_df(lnk, q, db_api='psql').fid.tolist()
    
    if not d:
        return 0
    else:
        return d[0]

Example #24

0

Show file

File: __init__.py Project: zonakre/gasp

def select_api_key():
    """
    Select the API Key to use
    """

    from gasp.fm.sql import query_to_df

    GOOGLE_KEYS_ = query_to_df(APIS_DB,
                               "SELECT fid, key, date, nrqst FROM google",
                               db_api='sqlite').to_dict(orient="records")

    GOOGLE_KEYS = [(GOOGLE_KEYS_['fid'], GOOGLE_KEYS_['key'],
                    GOOGLE_KEYS_['date'], GOOGLE_KEYS_['nrqst'])
                   for k in GOOGLE_KEYS_]

    DATE = datetime.date.today()
    DAYTIME = '{}-{}-{}'.format(
        str(DATE.year),
        str(DATE.month) if len(str(DATE.month)) == 2 else "0" +
        str(DATE.month),
        str(DATE.day) if len(str(DATE.day)) == 2 else "0" + str(DATE.day))

    for k in range(len(GOOGLE_KEYS)):
        if DAYTIME != str(GOOGLE_KEYS[k][2]):
            GOOGLE_API_KEY = GOOGLE_KEYS[k][1]
            API_REQUESTS = 0
            KEY_FID = GOOGLE_KEYS[k][0]

        else:
            API_REQUESTS = int(GOOGLE_KEYS[k][3])
            if API_REQUESTS >= 2490:
                continue

            else:
                GOOGLE_API_KEY = GOOGLE_KEYS[k][1]
                KEY_FID = GOOGLE_KEYS[k][0]

        if GOOGLE_API_KEY:
            break

    return KEY_FID, GOOGLE_API_KEY, API_REQUESTS

Example #25

0

Show file

def num_selection(osmcon, polyTbl, folder,
                  cellsize, srscode, rstTemplate, api='SQLITE'):
    """
    Select and Convert to Raster
    """
    
    import datetime;        import os
    from threading          import Thread
    if api == 'SQLITE':
        from gasp.anls.exct import sel_by_attr
    else:
        from gasp.to.shp    import psql_to_shp as sel_by_attr
    from gasp.fm.sql        import query_to_df
    from gasp.to.rst        import shp_to_raster
    
    # Get classes in data
    time_a = datetime.datetime.now().replace(microsecond=0)
    classes = query_to_df(osmcon, (
        "SELECT selection FROM {} "
        "WHERE selection IS NOT NULL "
        "GROUP BY selection"
    ).format(
        polyTbl
    ), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist()
    time_b = datetime.datetime.now().replace(microsecond=0)
    
    timeGasto = {0 : ('check_cls', time_b - time_a)}
    
    # Select and Export
    clsRst = {}
    SQL_Q = "SELECT {lc} AS cls, geometry FROM {tbl} WHERE selection={lc}"
    def FilterAndExport(CLS, cnt):
        time_x = datetime.datetime.now().replace(microsecond=0)
        if api == 'SQLITE':
            shp = sel_by_attr(
                osmcon, SQL_Q.format(lc=str(CLS), tbl=polyTbl),
                os.path.join(folder, 'sel_{}.shp'.format(str(CLS))),
                api_gis='ogr'
            )
        else:
            shp = sel_by_attr(
                osmcon, SQL_Q.format(lc=str(CLS), tbl=polyTbl),
                os.path.join(folder, 'sel_{}.shp'.format(str(CLS))),
                api='pgsql2shp', geom_col="geometry", tableIsQuery=True
            )
        time_y = datetime.datetime.now().replace(microsecond=0)
        
        rstCls = shp_to_raster(
            shp, None, cellsize, 0,
            os.path.join(folder, 'sel_{}.tif'.format(str(CLS))),
            epsg=srscode, rst_template=rstTemplate, api='gdal'
        )
        time_z = datetime.datetime.now().replace(microsecond=0)
        
        clsRst[int(CLS)] = rstCls
        timeGasto[cnt + 1] = ('toshp_{}'.format(str(CLS)), time_y - time_x)
        timeGasto[cnt + 2] = ('torst_{}'.format(str(CLS)), time_z - time_y)
    
    trs = []
    for i in range(len(classes)):
        trs.append(Thread(
            name="lll{}".format(str(classes[i])),
            target=FilterAndExport, args=(classes[i], (i+1) * 10)
        ))
    
    for t in trs: t.start()
    for t in trs: t.join()
    
    return clsRst, timeGasto

Example #26

0

Show file

File: tags.py Project: fagan2888/gasp

def get_not_used_tags(OSM_FILE, OUT_TBL):
    """
    Use a file OSM to detect tags not considered in the
    OSM2LULC procedure
    """

    import os
    from gasp.anls.exct import sel_by_attr
    from gasp.fm.sql import query_to_df
    from gasp.oss import get_filename
    from gasp.osm2lulc.utils import osm_to_sqdb
    from gasp.to import obj_to_tbl

    OSM_TAG_MAP = {
        "DB":
        os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
            'osmtolulc.sqlite'),
        "OSM_FEAT":
        "osm_features",
        "KEY_COL":
        "key",
        "VALUE_COL":
        "value",
        "GEOM_COL":
        "geom"
    }

    WORKSPACE = os.path.dirname(OUT_TBL)

    sqdb = osm_to_sqdb(
        OSM_FILE, os.path.join(WORKSPACE,
                               get_filename(OSM_FILE) + '.sqlite'))

    # Get Features we are considering
    ourOSMFeatures = query_to_df(
        OSM_TAG_MAP["DB"],
        ("SELECT {key} AS key_y, {value} AS value_y, {geom} AS geom_y "
         "FROM {tbl}").format(key=OSM_TAG_MAP["KEY_COL"],
                              value=OSM_TAG_MAP["VALUE_COL"],
                              geom=OSM_TAG_MAP["GEOM_COL"],
                              tbl=OSM_TAG_MAP["OSM_FEAT"]),
        db_api='sqlite')

    # Get Features in File
    TABLES_TAGS = {
        'points': ['highway', 'man_made', 'building'],
        'lines':
        ['highway', 'waterway', 'aerialway', 'barrier', 'man_made', 'railway'],
        'multipolygons': [
            'aeroway', 'amenity', 'barrier', 'building', 'craft', 'historic',
            'land_area', ''
            'landuse', 'leisure', 'man_made', 'military', 'natural', 'office',
            'place', 'shop', 'sport', 'tourism', 'waterway', 'power',
            'railway', 'healthcare', 'highway'
        ]
    }

    Qs = [
        " UNION ALL ".join([(
            "SELECT '{keycol}' AS key, {keycol} AS value, "
            "'{geomtype}' AS geom FROM {tbl} WHERE "
            "{keycol} IS NOT NULL"
        ).format(
            keycol=c, geomtype='Point' if table == 'points' else 'Line' \
                if table == 'lines' else 'Polygon',
            tbl=table
        ) for c in TABLES_TAGS[table]]) for table in TABLES_TAGS
    ]

    fileOSMFeatures = query_to_df(sqdb,
                                  ("SELECT key, value, geom FROM ({}) AS foo "
                                   "GROUP BY key, value, geom").format(
                                       " UNION ALL ".join(Qs)),
                                  db_api='sqlite')

    _fileOSMFeatures = fileOSMFeatures.merge(
        ourOSMFeatures,
        how='outer',
        left_on=["key", "value", "geom"],
        right_on=["key_y", "value_y", "geom_y"])

    # Select OSM Features of file without correspondence
    _fileOSMFeatures["isnew"] = _fileOSMFeatures.key_y.fillna(value='nenhum')

    newTags = _fileOSMFeatures[_fileOSMFeatures.isnew == 'nenhum']

    newTags["value"] = newTags.value.str.replace("'", "''")

    newTags["whr"] = newTags.key.str.encode('utf-8').astype(str) + "='" + \
        newTags.value.str.encode('utf-8').astype(str) + "'"

    # Export OUT_TBL with tags not being used
    obj_to_tbl(newTags, OUT_TBL, sheetsName="new_tags", sanitizeUtf8=True)

    # Export tags not being used to new shapefile
    def to_regular_str(row):
        from gasp import unicode_to_str

        san_str = unicode_to_str(row.whr)

        row["whr_san"] = san_str

        return row

    for t in TABLES_TAGS:
        if t == 'points':
            filterDf = newTags[newTags.geom == 'Point']

        elif t == 'lines':
            filterDf = newTags[newTags.geom == 'Line']

        elif t == 'multipolygons':
            filterDf = newTags[newTags.geom == 'Polygon']

        Q = unicode("SELECT * FROM {} WHERE {}",
                    'utf-8').format(unicode(t, 'utf-8'),
                                    filterDf.whr.str.cat(sep=" OR "), 'utf-8')

        try:
            shp = sel_by_attr(sqdb,
                              Q,
                              os.path.join(WORKPSACE, t + '.shp'),
                              api_gis='ogr')
        except:
            __filterDf = filterDf.apply(lambda x: to_regular_str(x), axis=1)

            _Q = "SELECT * FROM {} WHERE {}".format(
                t, __filterDf.whr_san.str.cat(sep=" OR "))

            shp = sel_by_attr(sqdb, _Q, os.path.join(WORKSPACE, t + '.shp'))

    return OUT_TBL

Example #27

0

Show file

File: time.py Project: zonakre/gasp

def ID_rows_with_temporal_proximity_by_entities(conParam, table, entity_field,
                                 day_field, hour_field, hour_decimal_field,
                                 time_tolerance, outXlsPath):
    """
    Retrieve rows from one pgtable with some temporal proximity
    
    Table structure should be
    entity |     day    | hour | hour_decimal
      0    | 2018-01-02 |  5   |   5,10
      0    | 2018-01-03 |  4   |   4,15
      0    | 2018-01-02 |  5   |   5,12
      0    | 2018-01-02 |  5   |   5,8
      1    | 2018-01-02 |  4   |   4,10
      1    | 2018-01-02 |  5   |   5,12
      1    | 2018-01-02 |  4   |   4,20
      1    | 2018-01-02 |  4   |   4,12
      1    | 2018-01-02 |  4   |   4,6
    
    For a time_tolerance of 5 minutes, the output table will have
    the rows with a temporal difference inside/bellow that time tolerance
    
    entity_field could be more than one field
    
    This method only identifies if one entity, for one day, has rows 
    very close of each others, in terms of time.
    
    Not a good strategy for large tables. For large tables, SQL based methods
    are needed
    """
    
    import pandas
    from gasp             import goToList
    from gasp.fm.sql      import query_to_df
    from gasp.sql.mng.fld import get_columns_type
    from gasp.to          import obj_to_tbl
    
    entity_field = goToList(entity_field)
    COLS = entity_field + [day_field, hour_field]
    COLS_TYPE = get_columns_type(conParam, table)
    
    # TIME TOLERANCE IN HOURS
    TIME_TOLERANCE = time_tolerance / 60.0
    
    def thereIsRowsSameTimeInt(row):
        whr = []
        for c in COLS:
            if COLS_TYPE[c] == str:
                whr.append("{}='{}'".format(c, row[c]))
            else:
                whr.append("{}={}".format(c, row[c]))
        
        hourRows = query_to_df(conParam,
            "SELECT {} FROM {} WHERE {}".format(
                hour_decimal_field, table,
                " AND ".join(whr)
            ), db_api='psql'
        )[hour_decimal_field].tolist()
        
        for i in range(len(hourRows)):
            for e in range(i+1, len(hourRows)):
                dif = abs(hourRows[i][0] - hourRows[e][0])
                
                if dif < TIME_TOLERANCE:
                    break
            
            if dif < TIME_TOLERANCE:
                break
        
        if dif < TIME_TOLERANCE:
            row['time_difference'] = 1
        else:
            row['time_difference'] = 0
        
        return row
    
    # Count entity occourrences for one day and hour
    countsByEntityTime = query_to_df(conParam, (
        "SELECT {scols}, conta FROM "
        "(SELECT {scols}, COUNT({ent}) AS conta FROM {tbl} "
        "GROUP BY {scols}) AS foo WHERE conta > 1"
    ).format(
        scols = ', '.join(COLS),
        ent = entity_field[0],
        tbl = table
    ), db_api='psql')
    
    # For each row in the last count, When count is > 1
    # Check time difference between rows for one day and hour
    countsByEntityTime = countsByEntityTime.apply(
        lambda x: thereIsRowsSameTimeInt(x), axis=1
    )
    
    obj_to_tbl(countsByEntityTime, outXlsPath)
    
    return outXlsPath

Example #28

0

Show file

def check_shape_diff(SHAPES_TO_COMPARE,
                     OUT_FOLDER,
                     REPORT,
                     conPARAM,
                     DB,
                     SRS_CODE,
                     GIS_SOFTWARE="GRASS",
                     GRASS_REGION_TEMPLATE=None):
    """
    Script to check differences between pairs of Feature Classes
    
    Suponha que temos diversas Feature Classes (FC) e que cada uma delas
    possui um determinado atributo; imagine também que,
    considerando todos os pares possíveis entre estas FC,
    se pretende comparar as diferenças na distribuição dos valores
    desse atributo em cada par.
    
    * Dependências:
    - ArcGIS;
    - GRASS;
    - PostgreSQL;
    - PostGIS.
    
    * GIS_SOFTWARE Options:
    - ARCGIS;
    - GRASS.
    """

    import datetime
    import os
    import pandas
    from gasp.fm.sql import query_to_df
    from gasp.sql.mng.tbl import tbls_to_tbl
    from gasp.sql.mng.geom import fix_geom, check_geomtype_in_table
    from gasp.sql.mng.geom import select_main_geom_type
    from gasp.sql.mng.qw import ntbl_by_query
    from gasp.prop.ff import check_isRaster
    from gasp.oss import get_filename
    from gasp.sql.mng.db import create_db
    from gasp.to.sql import shp_to_psql, df_to_db
    from gasp.to.shp import rst_to_polyg
    from gasp.to.shp import shp_to_shp, psql_to_shp
    from gasp.to import db_to_tbl

    # Check if folder exists, if not create it
    if not os.path.exists(OUT_FOLDER):
        from gasp.oss.ops import create_folder
        create_folder(OUT_FOLDER, overwrite=None)
    else:
        raise ValueError('{} already exists!'.format(OUT_FOLDER))

    # Start GRASS GIS Session if GIS_SOFTWARE == GRASS
    if GIS_SOFTWARE == "GRASS":
        if not GRASS_REGION_TEMPLATE:
            raise ValueError(
                'To use GRASS GIS you need to specify GRASS_REGION_TEMPLATE')

        from gasp.session import run_grass

        gbase = run_grass(OUT_FOLDER,
                          grassBIN='grass76',
                          location='shpdif',
                          srs=GRASS_REGION_TEMPLATE)

        import grass.script as grass
        import grass.script.setup as gsetup

        gsetup.init(gbase, OUT_FOLDER, 'shpdif', 'PERMANENT')

        from gasp.mng.grstbl import rename_col
        from gasp.to.shp.grs import shp_to_grs, grs_to_shp
        from gasp.to.rst import rst_to_grs
        from gasp.mng.fld import rename_column

    # Convert to SHAPE if file is Raster
    # Import to GRASS GIS if GIS SOFTWARE == GRASS
    i = 0
    _SHP_TO_COMPARE = {}
    for s in SHAPES_TO_COMPARE:
        isRaster = check_isRaster(s)

        if isRaster:
            if GIS_SOFTWARE == "ARCGIS":
                d = rst_to_polyg(s,
                                 os.path.join(os.path.dirname(s),
                                              get_filename(s) + '.shp'),
                                 gisApi='arcpy')

                _SHP_TO_COMPARE[d] = "gridcode"

            elif GIS_SOFTWARE == "GRASS":
                # To GRASS
                rstName = get_filename(s)
                inRst = rst_to_grs(s, "rst_" + rstName, as_cmd=True)
                # To Raster
                d = rst_to_polyg(inRst,
                                 rstName,
                                 rstColumn="lulc_{}".format(i),
                                 gisApi="grasscmd")

                # Export Shapefile
                shp = grs_to_shp(d, os.path.join(OUT_FOLDER, d + '.shp'),
                                 "area")

                _SHP_TO_COMPARE[shp] = "lulc_{}".format(i)

        else:
            if GIS_SOFTWARE == "ARCGIS":
                _SHP_TO_COMPARE[s] = SHAPES_TO_COMPARE[s]

            elif GIS_SOFTWARE == "GRASS":
                # To GRASS
                grsV = shp_to_grs(s, get_filename(s), asCMD=True)

                # Change name of column with comparing value
                rename_col(grsV,
                           SHAPES_TO_COMPARE[s],
                           "lulc_{}".format(i),
                           as_cmd=True)

                # Export
                shp = grs_to_shp(grsV,
                                 os.path.join(OUT_FOLDER, grsV + '_rn.shp'),
                                 "area")

                _SHP_TO_COMPARE[shp] = "lulc_{}".format(i)

        i += 1

    SHAPES_TO_COMPARE = _SHP_TO_COMPARE
    if GIS_SOFTWARE == "ARCGIS":
        from gasp.cpu.arcg.mng.fld import calc_fld
        from gasp.cpu.arcg.mng.wspace import create_geodb
        from gasp.mng.gen import copy_feat

        # Sanitize data and Add new field
        __SHAPES_TO_COMPARE = {}
        i = 0

        # Create GeoDatabase
        geodb = create_geodb(OUT_FOLDER, 'geo_sanitize')
        """ Sanitize Data """
        for k in SHAPES_TO_COMPARE:
            # Send data to GeoDatabase only to sanitize
            newFc = shp_to_shp(k,
                               os.path.join(geodb, get_filename(k)),
                               gisApi='arcpy')

            # Create a copy to change
            newShp = copy_feat(newFc,
                               os.path.join(OUT_FOLDER, os.path.basename(k)),
                               gisApi='arcpy')

            # Sanitize field name with interest data
            NEW_FLD = "lulc_{}".format(i)
            calc_fld(newShp,
                     NEW_FLD,
                     "[{}]".format(SHAPES_TO_COMPARE[k]),
                     isNewField={
                         "TYPE": "INTEGER",
                         "LENGTH": 5,
                         "PRECISION": ""
                     })

            __SHAPES_TO_COMPARE[newShp] = NEW_FLD

            i += 1

    else:
        __SHAPES_TO_COMPARE = SHAPES_TO_COMPARE

    # Create database
    conPARAM["DATABASE"] = create_db(conPARAM, DB)
    """ Union SHAPEs """

    UNION_SHAPE = {}
    FIX_GEOM = {}

    def fix_geometry(shp):
        # Send data to PostgreSQL
        nt = shp_to_psql(conPARAM, shp, SRS_CODE, api='shp2pgsql')

        # Fix data
        corr_tbl = fix_geom(conPARAM,
                            nt,
                            "geom",
                            "corr_{}".format(nt),
                            colsSelect=['gid', __SHAPES_TO_COMPARE[shp]])

        # Check if we have multiple geometries
        geomN = check_geomtype_in_table(conPARAM, corr_tbl)

        if geomN > 1:
            corr_tbl = select_main_geom_type(conPARAM, corr_tbl,
                                             "corr2_{}".format(nt))

        # Export data again
        newShp = psql_to_shp(conPARAM,
                             corr_tbl,
                             os.path.join(OUT_FOLDER, corr_tbl + '.shp'),
                             api='pgsql2shp',
                             geom_col='geom')

        return newShp

    SHPS = __SHAPES_TO_COMPARE.keys()
    for i in range(len(SHPS)):
        for e in range(i + 1, len(SHPS)):
            if GIS_SOFTWARE == 'ARCGIS':
                # Try the union thing
                unShp = union(SHPS[i],
                              SHPS[e],
                              os.path.join(OUT_FOLDER,
                                           "un_{}_{}.shp".format(i, e)),
                              api_gis="arcpy")

                # See if the union went all right
                if not os.path.exists(unShp):
                    # Union went not well

                    # See if geometry was already fixed
                    if SHPS[i] not in FIX_GEOM:
                        # Fix SHPS[i] geometry
                        FIX_GEOM[SHPS[i]] = fix_geometry(SHPS[i])

                    if SHPS[e] not in FIX_GEOM:
                        FIX_GEOM[SHPS[e]] = fix_geometry(SHPS[e])

                    # Run Union again
                    unShp = union(FIX_GEOM[SHPS[i]],
                                  FIX_GEOM[SHPS[e]],
                                  os.path.join(OUT_FOLDER,
                                               "un_{}_{}_f.shp".format(i, e)),
                                  api_gis="arcpy")

            elif GIS_SOFTWARE == "GRASS":
                # Optimized Union
                print "Union between {} and {}".format(SHPS[i], SHPS[e])
                time_a = datetime.datetime.now().replace(microsecond=0)
                __unShp = optimized_union_anls(
                    SHPS[i],
                    SHPS[e],
                    os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)),
                    GRASS_REGION_TEMPLATE,
                    SRS_CODE,
                    os.path.join(OUT_FOLDER, "work_{}_{}".format(i, e)),
                    multiProcess=True)
                time_b = datetime.datetime.now().replace(microsecond=0)
                print time_b - time_a

                # Rename cols
                unShp = rename_column(
                    __unShp, {
                        "a_" + __SHAPES_TO_COMPARE[SHPS[i]]:
                        __SHAPES_TO_COMPARE[SHPS[i]],
                        "b_" + __SHAPES_TO_COMPARE[SHPS[e]]:
                        __SHAPES_TO_COMPARE[SHPS[e]]
                    }, os.path.join(OUT_FOLDER, "un_{}_{}_rn.shp".format(i,
                                                                         e)))

            UNION_SHAPE[(SHPS[i], SHPS[e])] = unShp

    # Send data one more time to postgresql
    SYNTH_TBL = {}

    for uShp in UNION_SHAPE:
        # Send data to PostgreSQL
        union_tbl = shp_to_psql(conPARAM,
                                UNION_SHAPE[uShp],
                                SRS_CODE,
                                api='shp2pgsql')

        # Produce table with % of area equal in both maps
        areaMapTbl = ntbl_by_query(
            conPARAM,
            "{}_syn".format(union_tbl),
            ("SELECT CAST('{lulc_1}' AS text) AS lulc_1, "
             "CAST('{lulc_2}' AS text) AS lulc_2, "
             "round("
             "CAST(SUM(g_area) / 1000000 AS numeric), 4"
             ") AS agree_area, round("
             "CAST((SUM(g_area) / MIN(total_area)) * 100 AS numeric), 4"
             ") AS agree_percentage, "
             "round("
             "CAST(MIN(total_area) / 1000000 AS numeric), 4"
             ") AS total_area FROM ("
             "SELECT {map1_cls}, {map2_cls}, ST_Area(geom) AS g_area, "
             "CASE "
             "WHEN {map1_cls} = {map2_cls} "
             "THEN 1 ELSE 0 "
             "END AS isthesame, total_area FROM {tbl}, ("
             "SELECT SUM(ST_Area(geom)) AS total_area FROM {tbl}"
             ") AS foo2"
             ") AS foo WHERE isthesame = 1 "
             "GROUP BY isthesame").format(
                 lulc_1=get_filename(uShp[0]),
                 lulc_2=get_filename(uShp[1]),
                 map1_cls=__SHAPES_TO_COMPARE[uShp[0]],
                 map2_cls=__SHAPES_TO_COMPARE[uShp[1]],
                 tbl=union_tbl),
            api='psql')

        # Produce confusion matrix for the pair in comparison
        lulcCls = query_to_df(
            conPARAM,
            ("SELECT fcol FROM ("
             "SELECT CAST({map1_cls} AS text) AS fcol FROM {tbl} "
             "GROUP BY {map1_cls} "
             "UNION ALL SELECT CAST({map2_cls} AS text) FROM {tbl} "
             "GROUP BY {map2_cls}"
             ") AS foo GROUP BY fcol ORDER BY fcol").format(
                 tbl=union_tbl,
                 map1_cls=__SHAPES_TO_COMPARE[uShp[0]],
                 map2_cls=__SHAPES_TO_COMPARE[uShp[1]]),
            db_api='psql').fcol.tolist()

        matrixTbl = ntbl_by_query(
            conPARAM,
            "{}_matrix".format(union_tbl),
            ("SELECT * FROM crosstab('"
             "SELECT CASE "
             "WHEN foo.{map1_cls} IS NOT NULL "
             "THEN foo.{map1_cls} ELSE jtbl.flyr "
             "END AS lulc1_cls, CASE "
             "WHEN foo.{map2_cls} IS NOT NULL "
             "THEN foo.{map2_cls} ELSE jtbl.slyr "
             "END AS lulc2_cls, CASE "
             "WHEN foo.garea IS NOT NULL "
             "THEN round(CAST(foo.garea / 1000000 AS numeric)"
             ", 3) ELSE 0 "
             "END AS garea FROM ("
             "SELECT CAST({map1_cls} AS text) AS {map1_cls}, "
             "CAST({map2_cls} AS text) AS {map2_cls}, "
             "SUM(ST_Area(geom)) AS garea "
             "FROM {tbl} GROUP BY {map1_cls}, {map2_cls}"
             ") AS foo FULL JOIN ("
             "SELECT f.flyr, s.slyr FROM ("
             "SELECT CAST({map1_cls} AS text) AS flyr "
             "FROM {tbl} GROUP BY {map1_cls}"
             ") AS f, ("
             "SELECT CAST({map2_cls} AS text) AS slyr "
             "FROM {tbl} GROUP BY {map2_cls}"
             ") AS s"
             ") AS jtbl "
             "ON foo.{map1_cls} = jtbl.flyr AND "
             "foo.{map2_cls} = jtbl.slyr "
             "ORDER BY 1,2"
             "') AS ct("
             "lulc_cls text, {crossCols}"
             ")").format(crossCols=", ".join(
                 ["cls_{} numeric".format(c) for c in lulcCls]),
                         tbl=union_tbl,
                         map1_cls=__SHAPES_TO_COMPARE[uShp[0]],
                         map2_cls=__SHAPES_TO_COMPARE[uShp[1]]),
            api='psql')

        SYNTH_TBL[uShp] = {"TOTAL": areaMapTbl, "MATRIX": matrixTbl}

    # UNION ALL TOTAL TABLES
    total_table = tbls_to_tbl(conPARAM,
                              [SYNTH_TBL[k]["TOTAL"] for k in SYNTH_TBL],
                              'total_table')

    # Create table with % of agreement between each pair of maps
    mapsNames = query_to_df(
        conPARAM,
        ("SELECT lulc FROM ("
         "SELECT lulc_1 AS lulc FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc FROM {tbl} GROUP BY lulc_2"
         ") AS lu GROUP BY lulc ORDER BY lulc").format(tbl=total_table),
        db_api='psql').lulc.tolist()

    FLDS_TO_PIVOT = ["agree_percentage", "total_area"]

    Q = ("SELECT * FROM crosstab('"
         "SELECT CASE "
         "WHEN foo.lulc_1 IS NOT NULL THEN foo.lulc_1 ELSE jtbl.tmp1 "
         "END AS lulc_1, CASE "
         "WHEN foo.lulc_2 IS NOT NULL THEN foo.lulc_2 ELSE jtbl.tmp2 "
         "END AS lulc_2, CASE "
         "WHEN foo.{valCol} IS NOT NULL THEN foo.{valCol} ELSE 0 "
         "END AS agree_percentage FROM ("
         "SELECT lulc_1, lulc_2, {valCol} FROM {tbl} UNION ALL "
         "SELECT lulc_1, lulc_2, {valCol} FROM ("
         "SELECT lulc_1 AS lulc_2, lulc_2 AS lulc_1, {valCol} "
         "FROM {tbl}"
         ") AS tst"
         ") AS foo FULL JOIN ("
         "SELECT lulc_1 AS tmp1, lulc_2 AS tmp2 FROM ("
         "SELECT lulc_1 AS lulc_1 FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc_1 FROM {tbl} GROUP BY lulc_2"
         ") AS tst_1, ("
         "SELECT lulc_1 AS lulc_2 FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc_2 FROM {tbl} GROUP BY lulc_2"
         ") AS tst_2 WHERE lulc_1 = lulc_2 GROUP BY lulc_1, lulc_2"
         ") AS jtbl ON foo.lulc_1 = jtbl.tmp1 AND foo.lulc_2 = jtbl.tmp2 "
         "ORDER BY lulc_1, lulc_2"
         "') AS ct("
         "lulc_map text, {crossCols}"
         ")")

    TOTAL_AGREE_TABLE = None
    TOTAL_AREA_TABLE = None
    for f in FLDS_TO_PIVOT:
        if not TOTAL_AGREE_TABLE:
            TOTAL_AGREE_TABLE = ntbl_by_query(
                conPARAM,
                "agreement_table",
                Q.format(tbl=total_table,
                         valCol=f,
                         crossCols=", ".join([
                             "{} numeric".format(map_) for map_ in mapsNames
                         ])),
                api='psql')

        else:
            TOTAL_AREA_TABLE = ntbl_by_query(
                conPARAM,
                "area_table",
                Q.format(tbl=total_table,
                         valCol=f,
                         crossCols=", ".join([
                             "{} numeric".format(map_) for map_ in mapsNames
                         ])),
                api='psql')

    # Union Mapping
    UNION_MAPPING = pandas.DataFrame(
        [[
            get_filename(k[0]),
            get_filename(k[1]),
            get_filename(UNION_SHAPE[k])
        ] for k in UNION_SHAPE],
        columns=['shp_a', 'shp_b', 'union_shp'
                 ]) if GIS_SOFTWARE == "ARCGIS" else pandas.DataFrame(
                     [[k[0], k[1], get_filename(UNION_SHAPE[k])]
                      for k in UNION_SHAPE],
                     columns=['shp_a', 'shp_b', 'union_shp'])

    UNION_MAPPING = df_to_db(conPARAM, UNION_MAPPING, 'union_map', api='psql')

    # Export Results
    TABLES = [UNION_MAPPING, TOTAL_AGREE_TABLE, TOTAL_AREA_TABLE
              ] + [SYNTH_TBL[x]["MATRIX"] for x in SYNTH_TBL]

    SHEETS = ["union_map", "agreement_percentage", "area_with_data_km"] + [
        "{}_{}".format(get_filename(x[0])[:15],
                       get_filename(x[1])[:15]) for x in SYNTH_TBL
    ]

    db_to_xls(conPARAM, ["SELECT * FROM {}".format(x) for x in TABLES],
              REPORT,
              sheetsNames=SHEETS,
              dbAPI='psql')

    return REPORT

Example #29

0

Show file

File: mod6.py Project: fagan2888/gasp

def rst_pnt_to_build(osmLink, pntTable, polyTable, api_db='SQLITE'):
    """
    Replace buildings with tag yes using the info in the Points Layer
    
    Only used for URBAN ATLAS and CORINE LAND COVER
    """

    import datetime
    from gasp.sql.mng.tbl import row_num as cnt_row
    from gasp.fm.sql import query_to_df
    if api_db == 'POSTGIS':
        from gasp.to.shp.grs import psql_to_grs as db_to_shp
    else:
        from gasp.to.shp.grs import sqlite_to_shp as db_to_shp
    from gasp.sql.anls.ovlay import sgbd_get_feat_within
    from gasp.sql.anls.ovlay import sgbd_get_feat_not_within
    from gasp.to.rst import shp_to_raster

    time_a = datetime.datetime.now().replace(microsecond=0)
    new_build = sgbd_get_feat_within(
        osmLink,
        ("(SELECT buildings AS pnt_build, geometry AS pnt_geom "
         "FROM {} WHERE buildings IS NOT NULL)").format(pntTable),
        "pnt_geom",
        ("(SELECT buildings AS poly_build, geometry AS poly_geom "
         "FROM {} WHERE buildings IS NOT NULL)").format(polyTable),
        "poly_geom",
        "new_buildings",
        inTblCols="pnt_build AS cls",
        withinCols="poly_geom AS geometry",
        outTblIsFile=None,
        apiToUse="OGR_SPATIALITE" if api_db != "POSTGIS" else api_db)
    time_b = datetime.datetime.now().replace(microsecond=0)

    yes_build = sgbd_get_feat_not_within(
        osmLink,
        ("(SELECT buildings AS poly_build, geometry AS poly_geom "
         "FROM {} WHERE buildings IS NOT NULL)").format(polyTable),
        "poly_geom",
        ("(SELECT buildings AS pnt_build, geometry AS pnt_geom "
         "FROM {} WHERE buildings IS NOT NULL)").format(pntTable),
        "pnt_geom",
        "yes_builds",
        inTblCols="poly_geom AS geometry, 11 AS cls",
        outTblIsFile=None,
        apiToUse="OGR_SPATIALITE" if api_db != "POSTGIS" else api_db)
    time_c = datetime.datetime.now().replace(microsecond=0)

    resLayers = {}
    N11 = cnt_row(osmLink,
                  yes_build,
                  api='psql' if api_db == 'POSTGIS' else 'sqlite')
    time_d = datetime.datetime.now().replace(microsecond=0)

    if N11:
        # Data to GRASS GIS
        grsBuild11 = db_to_shp(osmLink,
                               yes_build,
                               "yes_builds",
                               notTable=True,
                               filterByReg=True)
        time_f = datetime.datetime.now().replace(microsecond=0)

        # To raster
        rstBuild11 = shp_to_raster(grsBuild11,
                                   11,
                                   None,
                                   None,
                                   "rst_builds11",
                                   api="grass")
        time_g = datetime.datetime.now().replace(microsecond=0)

        resLayers[11] = [rstBuild11]

    else:
        time_f = None
        time_g = None

    # Add data into GRASS GIS
    lulcCls = query_to_df(
        osmLink,
        "SELECT cls FROM {} GROUP BY cls".format(new_build),
        db_api='psql' if api_db == 'POSTGIS' else 'sqlite').cls.tolist()

    timeGasto = {
        0: ('intersect', time_b - time_a),
        1: ('disjoint', time_c - time_b),
        2: ('count_b11', time_d - time_c),
        3: None if not time_f else ('import_b11', time_f - time_d),
        4: None if not time_g else ('torst_b11', time_g - time_f),
    }
    tk = 5
    for cls in lulcCls:
        time_x = datetime.datetime.now().replace(microsecond=0)
        shp = db_to_shp(osmLink,
                        new_build,
                        "nbuild_{}".format(str(cls)),
                        "cls = {}".format(cls),
                        notTable=True,
                        filterByReg=True)
        time_y = datetime.datetime.now().replace(microsecond=0)

        rstb = shp_to_raster(shp,
                             int(cls),
                             None,
                             None,
                             "rst_nbuild_{}".format(str(cls)),
                             api="grass")
        time_z = datetime.datetime.now().replace(microsecond=0)

        if int(cls) == 11 and int(cls) in resLayers:
            resLayers[int(cls)].append(rstb)

        else:
            resLayers[int(cls)] = [rstb]

        timeGasto[tk] = ('import_bn{}'.format(cls), time_y - time_x)
        timeGasto[tk + 1] = ('torst_bn{}'.format(cls), time_z - time_y)

        tk += 2

    return resLayers, timeGasto

Example #30

0

Show file

def db_to_tbl(conDB, tables, outTbl, txtDelimiter=None, dbAPI='psql',
              outTblF=None, sheetsNames=None):
    """
    Database data to File table
    
    API's Avaialble:
    * psql;
    * sqlite;
    """
    
    import os
    from gasp        import goToList
    from gasp.fm.sql import query_to_df
    
    if tables == 'ALL':
        from gasp.sql.mng.tbl import lst_tbl
        
        tables = lst_tbl(conDB, schema='public', excludeViews=True, api=dbAPI)
    else:
        tables = goToList(tables)
    
    sheetsNames = goToList(sheetsNames)
    
    outTblF = None if not outTblF else outTblF \
        if outTblF[0] == '.' else '.' + outTblF
    
    if len(tables) > 1:
        if not os.path.isdir(outTbl) or not outTblF:
            raise ValueError((
                "When tables has more than one table, "
                "outTbl must be dir and outTblF must be specified"
            ))
    
    elif len(tables) == 1:
        if os.path.isdir(outTbl) and outTblF:
            outTbl = os.path.join(outTbl, tables[0] + outTblF)
        
        elif os.path.isdir(outTbl) and not outTbl:
            raise ValueError((
                'We find only a table to export and outTbl is a dir. '
                'Please give a path to a file or specify the table format '
                'using outTblF format'
            ))
        
        else:
            outTbl = outTbl
    
    else:
        raise ValueError(
            "tables value is not valid"
        )   
    
    DFS = [query_to_df(
        conDB, t if t.startswith(
            "SELECT") else "SELECT * FROM {}".format(t), db_api=dbAPI
    ) for t in tables]
    
    if os.path.isfile(outTbl):
        from gasp.oss import get_fileformat
        
        ff = get_fileformat(outTbl)
        
        if ff == '.xlsx' or ff == '.xls':
            obj_to_tbl(DFS, outTbl, sheetsName=sheetsNames, sanitizeUtf8=None)
            
            return outTbl
    
    for i in range(len(DFS)):
        obj_to_tbl(
            DFS[i],
            outTbl if len(DFS) == 1 else os.path.join(
                outTbl, tables[i] + outTblF
            ),
            delimiter=txtDelimiter,
            sheetsName=sheetsNames
        )
    
    return outTbl