def tbl_fromdb_todb(from_db, to_db, tables, qForTbl=None, api='pandas'): """ Send PGSQL Tables from one database to other """ from gasp.pyt import obj_to_lst api = 'pandas' if api != 'pandas' and api != 'psql' else api tables = obj_to_lst(tables) if api == 'pandas': from gasp.sql.fm import q_to_obj for table in tables: if not qForTbl: tblDf = q_to_obj(from_db, "SELECT * FROM {}".format( table), db_api='psql') else: if table not in qForTbl: tblDf = q_to_obj(from_db, "SELECT * FROM {}".format( table), db_api='psql') else: tblDf = q_to_obj(from_db, qForTbl[table], db_api='psql') df_to_db(to_db, tblDf, table, api='psql') else: import os from gasp.pyt.oss import mkdir, del_folder from gasp.sql.fm import dump_tbls from gasp.sql.to import restore_tbls tmpFolder = mkdir( os.path.dirname(os.path.abspath(__file__)), randName=True ) # Dump sqlScript = dump_tbls(from_db, tables, os.path.join( tmpFolder, "tables_data.sql" )) # Restore restore_tbls(to_db, sqlScript, tables) del_folder(tmpFolder)
def split_table_by_col_distinct(db, tbl, col): """ Create a new table for each value in one column """ from gasp.sql.fm import q_to_obj from gasp.sql.i import cols_type from gasp.sql.to import q_to_ntbl fields_types = cols_type(db, tbl) # Get unique values VALUES = q_to_obj(db, "SELECT {col} FROM {t} GROUP BY {col}".format( col=col, t=tbl ), db_api='psql' )[col].tolist() whr = '{}=\'{}\'' if fields_types[col] == str else '{}={}' for row in VALUES: q_to_ntbl( db, '{}_{}'.format(tbl, str(row[0])), "SELECT * FROM {} WHERE {}".format( tbl, whr.format(col, str(row[0])) ), api='psql')
def thereIsRowsSameTimeInt(row): whr = [] for c in COLS: if COLS_TYPE[c] == str: whr.append("{}='{}'".format(c, row[c])) else: whr.append("{}={}".format(c, row[c])) hourRows = q_to_obj(db, "SELECT {} FROM {} WHERE {}".format( hour_decimal_field, table, " AND ".join(whr)), db_api='psql')[hour_decimal_field].tolist() for i in range(len(hourRows)): for e in range(i + 1, len(hourRows)): dif = abs(hourRows[i][0] - hourRows[e][0]) if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: row['time_difference'] = 1 else: row['time_difference'] = 0 return row
def db_to_db(db_a, db_b, typeDBA, typeDBB): """ All tables in one Database to other database Useful when we want to migrate a SQLITE DB to a PostgreSQL DB typesDB options: * sqlite * psql """ import os from gasp.sql.fm import q_to_obj from gasp.sql.i import lst_tbl from gasp.sql.db import create_db # List Tables in DB A tbls = lst_tbl(db_a, excludeViews=True, api=typeDBA) # Create database B db_b = create_db(db_b, overwrite=False, api=typeDBB) # Table to Database B for tbl in tbls: df = q_to_obj( db_a, "SELECT * FROM {}".format(tbl), db_api=typeDBA ) df_to_db(db_b, df, tbl, append=None, api=typeDBB)
def bar_chart_by_query(db, query, xaxis_col, chart_data_fld_name, series, outxls): """ Execute some query for each serie in series. Produce a bar chart with the data returned. Each serie will be new bar in the chart. So, the query statment must have a white space to put each serie in series... E.g.: "SELECT fld_name FROM t WHERE series_fld={}".format(serie in series) """ from gasp.sql.fm import q_to_obj from gasp.pyt.df.joins import combine_dfs from gasp.pyt.xls.charts import barchart_in_xls_from_df dataDf = [] for serie in series: data = q_to_obj(db, query.format(serie), db_api='psql') data.rename(columns={chart_data_fld_name: serie}, inplace=True) dataDf.append(data) main_df = dataDf[0] main_df = combine_dfs(main_df, dataDf[1:], xaxis_col) outxls = barchart_in_xls_from_df(main_df, outxls, series, xaxis_col) return outxls
def apndtbl_in_otherdb(db_a, db_b, tblA, tblB, mapCols, geomCol=None, srsEpsg=None): """ Append data of one table to another table in other database. """ from gasp.sql.fm import q_to_obj if geomCol and srsEpsg: df = q_to_obj(db_a, "SELECT {} FROM {}".format( ", ".join(list(mapCols.keys())), tblA ), db_api='psql', geomCol=geomCol, epsg=srsEpsg) else: df = q_to_obj(db_b, "SELECT {} FROM {}".format( ", ".join(list(mapCols.keys())), tblA ), db_api='psql', geomCol=None, epsg=None) # Change Names df.rename(columns=mapCols, inplace=True) if geomCol: for k in mapCols: if geomCol == k: geomCol = mapCols[k] break # Get Geom Type # Send data to other database if geomCol and srsEpsg: from gasp.gt.prop.feat import get_gtype gType = get_gtype(df, geomCol=geomCol, gisApi='pandas') df_to_db( db_b, df, tblB, append=True, api='psql', epsg=srsEpsg, geomType=gType, colGeom=geomCol ) else: df_to_db(db_b, df, tblB, append=True, api='psql') return tblB
def tbl_geomtype(db, table, geomCol='geom'): """ Return the number of geometry types in table """ from gasp.sql.fm import q_to_obj return int(q_to_obj(db, ( "SELECT COUNT(*) AS row_count FROM (" "SELECT ST_GeometryType((ST_Dump({})).geom) AS cnt_geom " "FROM {} GROUP BY ST_GeometryType((ST_Dump({})).geom)" ") AS foo" ).format(geomCol, table, geomCol), db_api='psql').iloc[0].row_count)
def split_table_entity_number(db, table, entity_field, entity_number): """ Split tables in several using as reference a number of entities per table If a table has 1 000 000 entities and the entity_number is 250 000, this method will create four tables, each one with 250 000 entities. 250 000 entities, not rows. Don't forget that the main table may have more than one reference to the same entity. """ import pandas from gasp.sql.fm import q_to_obj from gasp.sql.i import cols_type from gasp.sql.to import q_to_ntbl # Select entities in table entities = q_to_obj(db, "SELECT {c} FROM {t} GROUP BY {c}".format( c=entity_field, t=table ), db_api='psql') # Split entities into groups acoording entity_number entityGroup = [] lower = 0 high = entity_number while lower <= len(entities.index): if high > len(entities.index): high = len(entities.index) entityGroup.append(entities.iloc[lower : high]) lower += entity_number high += entity_number # For each dataframe, create a new table COLS_TYPE = cols_type(db, table) c = 0 for df in entityGroup: if COLS_TYPE[entity_field] != str: df[entity_field] = '{}='.format(entity_field) + df[entity_field].astype(str) else: df[entity_field] = '{}=\''.format(entity_field) + df[entity_field].astype(str) + '\'' whr = ' OR '.join(df[entity_field]) q_to_ntbl(db, '{}_{}'.format(table, str(c)), ( "SELECT * FROM {} WHERE {}" ).format(table, whr), api='psql') c += 1
def build12_torst(buildTbl): LulcCls = q_to_obj( osmdb, "SELECT cls FROM {} GROUP BY cls".format(buildTbl), db_api='psql' if apidb == 'POSTGIS' else 'sqlite').cls.tolist() for lulc_cls in LulcCls: time_m = dt.datetime.now().replace(microsecond=0) # To SHP if apidb == 'SQLITE': shpB = sel_by_attr(osmdb, "SELECT * FROM {} WHERE cls={}".format( buildTbl, str(lulc_cls)), os.path.join( folder, 'nshp_build_{}.shp'.format(lulc_cls)), api_gis='ogr') else: shpB = sel_by_attr(osmdb, "SELECT * FROM {} WHERE cls={}".format( buildTbl, str(lulc_cls)), "geometry", os.path.join( folder, 'nshp_build_{}.shp'.format(lulc_cls)), api='pgsql2shp', tableIsQuery=True) time_n = dt.datetime.now().replace(microsecond=0) # To RST brst = shp_to_rst(shpB, None, cells, 0, os.path.join( folder, 'nrst_build_{}.tif'.format(lulc_cls)), srscode, rstT, api='gdal') time_o = dt.datetime.now().replace(microsecond=0) resLyr[int(lulc_cls)] = [brst] timeGasto[int(lulc_cls)] = ('to_shp_{}'.format(str(lulc_cls)), time_n - time_m) timeGasto[int(lulc_cls) + 1] = ('to_rst_n_{}'.format( str(lulc_cls)), time_o - time_n)
def count_entity_periods_with_certain_duration(db, PERIOD_INTERVAL, PGTABLE, TIME_FIELD, ENTITY_FIELD, OUT_TABLE, filterWhere=None): """ Count rows in a pgtable for a given period of X minutes for each interest entity PERIOD_INTERVAL = "01:00:00" """ import pandas from gasp.pyt.tm import day_to_intervals2 from gasp.pyt.df.joins import combine_dfs # Get Intervals INTERVALS = day_to_intervals2(PERIOD_INTERVAL) # For each interval/period, count the number of rows by entity counting = [] for _int in INTERVALS: Q = ("SELECT {entityCol}, COUNT({entityCol}) AS {cntCol} " "FROM {table} WHERE " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= " "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < " "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr} " "GROUP BY {entityCol}").format(cntCol="s{}_e{}".format( _int[0][:5], _int[1][:5]).replace(":", "_"), table=PGTABLE, timeCol=TIME_FIELD, entityCol=ENTITY_FIELD, minLower=_int[0], minUpper=_int[1], whr="" if not filterWhere else " AND ({}) ".format(filterWhere)) count = q_to_obj(db, Q, db_api='psql') counting.append(count) mainDf = combine_dfs(counting[0], counting[1:], ENTITY_FIELD) obj_to_tbl(mainDf, OUT_TABLE) return OUT_TABLE
def distinct_val(db, pgtable, column): """ Get distinct values in one column of one pgtable """ from gasp.pyt import obj_to_lst from gasp.sql.fm import q_to_obj data = q_to_obj(db, "SELECT {col} FROM {t} GROUP BY {col};".format( col=", ".join(obj_to_lst(column)), t=pgtable ), db_api='psql' ).to_dict(orient="records") return data
def check_last_id(db, pk, table): """ Check last ID of a given table return 0 if there is no data """ from gasp.sql.fm import q_to_obj q = "SELECT MAX({}) AS fid FROM {}".format(pk, table) d = q_to_obj(db, q, db_api='psql').fid.tolist() if not d[0]: return 0 else: return d[0]
def grs_rst(db, polyTbl, api='SQLITE'): """ Simple selection, convert result to Raster """ import datetime from gasp.sql.fm import q_to_obj from gasp.gt.toshp.db import dbtbl_to_shp as db_to_grs from gasp.gt.torst import shp_to_rst # Get Classes time_a = datetime.datetime.now().replace(microsecond=0) lulcCls = q_to_obj(db, ( "SELECT selection FROM {} " "WHERE selection IS NOT NULL " "GROUP BY selection" ).format(polyTbl), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0 : ('check_cls', time_b - time_a)} # Import data into GRASS and convert it to raster clsRst = {} tk = 1 for cls in lulcCls: time_x = datetime.datetime.now().replace(microsecond=0) grsVect = db_to_grs( db, polyTbl, "geometry", "rule1_{}".format(str(cls)), inDB='psql' if api == 'POSTGIS' else 'sqlite', where="selection = {}".format(str(cls)), notTable=True, filterByReg=True, outShpIsGRASS=True ) time_y = datetime.datetime.now().replace(microsecond=0) grsRst = shp_to_rst( grsVect, int(cls), None, None, "rst_rule1_{}".format(str(cls)), api='grass' ) time_z = datetime.datetime.now().replace(microsecond=0) clsRst[int(cls)] = grsRst timeGasto[tk] = ('import_{}'.format(cls), time_y - time_x) timeGasto[tk+1] = ('torst_{}'.format(cls), time_z - time_y) tk += 2 return clsRst, timeGasto
def count_by_periods_with_certain_duration(db, PERIOD_INTERVAL, pgtable, TIME_FIELD, outTable, filterWhere=None): """ Count rows in a pgtable by periods of X minutes PERIOD_INTERVAL = "01:00:00" """ import pandas from gasp.pyt.tm import day_to_intervals2 # Get Intervals INTERVALS = day_to_intervals2(PERIOD_INTERVAL) # For each interval/period, count the number of rows counting = None for _int_ in INTERVALS: QUERY = ("SELECT COUNT(*) AS count FROM {table} WHERE " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= " "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < " "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr}").format( table=pgtable, timeCol=TIME_FIELD, minLower=_int_[0], minUpper=_int_[1], whr="" if not filterWhere else " AND ({})".format(filterWhere)) count = q_to_obj(db, QUERY, db_api='psql') count.rename(index={0: "{}-{}".format(_int_[0][:5], _int_[1][:5])}, inplace=True) if type(counting) != pandas.DataFrame: counting = count.copy() else: counting = counting.append(count, ignore_index=False) obj_to_tbl(counting, outTable) return outTable
def show_duplicates_in_xls(db_name, table, pkCols, outFile, tableIsQuery=None): """ Find duplicates and write these objects in a table """ import pandas from gasp.pyt import obj_to_lst from gasp.sql.fm import q_to_obj from gasp.to import obj_to_tbl pkCols = obj_to_lst(pkCols) if not pkCols: raise ValueError("pkCols value is not valid") if not tableIsQuery: q = ("SELECT {t}.* FROM {t} INNER JOIN (" "SELECT {cls}, COUNT({cnt}) AS conta FROM {t} " "GROUP BY {cls}" ") AS foo ON {rel} " "WHERE conta > 1").format(t=table, cls=", ".join(pkCols), cnt=pkCols[0], rel=" AND ".join([ "{t}.{c} = foo.{c}".format(t=table, c=col) for col in pkCols ])) else: q = ("SELECT foo.* FROM ({q_}) AS foo INNER JOIN (" "SELECT {cls}, COUNT({cnt}) AS conta " "FROM ({q_}) AS foo2 GROUP BY {cls}" ") AS jt ON {rel} " "WHERE conta > 1").format(q_=table, cls=", ".join(pkCols), cnt=pkCols[0], rel=" AND ".join([ "foo.{c} = jt.{c}".format(c=x) for x in pkCols ])) data = q_to_obj(db_name, q, db_api='psql') obj_to_tbl(data, outFile) return outFile
def row_num(db, table, where=None, api='psql'): """ Return the number of rows in Query API's Available: * psql; * sqlite; """ from gasp.sql.fm import q_to_obj if not table.startswith('SELECT '): Q = "SELECT COUNT(*) AS nrows FROM {}{}".format( table, "" if not where else " WHERE {}".format(where)) else: Q = "SELECT COUNT(*) AS nrows FROM ({}) AS foo".format(table) d = q_to_obj(db, Q, db_api=api) return int(d.iloc[0].nrows)
def cols_name(dbname, table, sanitizeSpecialWords=True, api='psql'): """ Return the columns names of a table in one Database """ if api == 'psql': c = sqlcon(dbname, sqlAPI='psql') cursor = c.cursor() cursor.execute("SELECT * FROM {} LIMIT 1;".format(table)) colnames = [desc[0] for desc in cursor.description] if sanitizeSpecialWords: from gasp.cons.psql import PG_SPECIAL_WORDS for i in range(len(colnames)): if colnames[i] in PG_SPECIAL_WORDS: colnames[i] = '"{}"'.format(colnames[i]) elif api == 'sqlite': import sqlite3 con = sqlite3.connect(dbname) cursor = con.execute("SELECT * FROM {} LIMIT 1".format(table)) colnames = list(map(lambda x: x[0], cursor.description)) elif api == 'mysql': from gasp.sql.fm import q_to_obj data = q_to_obj(dbname, "SELECT * FROM {} LIMIT 1".format(table), db_api='mysql') colnames = data.columns.values else: raise ValueError('API {} is not available'.format(api)) return colnames
def lst_views(db, schema='public', basename=None): """ List Views in database """ from gasp.pyt import obj_to_lst from gasp.sql.fm import q_to_obj basename = obj_to_lst(basename) basenameStr = "" if not basename else "{}".format(" OR ".join( ["{} LIKE '%%{}%%'".format("table_name", b) for b in basename])) views = q_to_obj( db, ("SELECT table_name FROM information_schema.views " "WHERE table_schema='{}'{}").format( schema, "" if not basename else " AND ({})".format(basenameStr)), db_api='psql') return views.table_name.tolist()
def tbl_ext(db, table, geomCol): """ Return extent of the geometries in one pgtable """ from gasp.sql.fm import q_to_obj q = ( "SELECT MIN(ST_X(pnt_geom)) AS eleft, MAX(ST_X(pnt_geom)) AS eright, " "MIN(ST_Y(pnt_geom)) AS bottom, MAX(ST_Y(pnt_geom)) AS top " "FROM (" "SELECT (ST_DumpPoints({geomcol})).geom AS pnt_geom " "FROM {tbl}" ") AS foo" ).format(tbl=table, geomcol=geomCol) ext = q_to_obj(db, q, db_api='psql').to_dict(orient='index')[0] return [ ext['eleft'], ext['bottom'], ext['eright'], ext['top'] ]
def get_osm_feat_by_rule(nomenclature): """ Return OSM Features By rule """ from gasp.sql.fm import q_to_obj Q = ( "SELECT jtbl.{rellulccls} AS {rellulccls}, " "{osmfeat}.{key} AS {key}, {osmfeat}.{val} AS {val}, " "jtbl.{ruleName} AS {ruleName}, jtbl.{bufferCol} " "AS {bufferCol}, jtbl.{areaCol} AS {areaCol} " "FROM {osmfeat} INNER JOIN (" "SELECT {osmrel}.{relosmid}, {osmrel}.{rellulccls}, " "{rules}.{ruleID}, {rules}.{ruleName}, " "{osmrel}.{bufferCol}, {osmrel}.{areaCol} " "FROM {osmrel} " "INNER JOIN {rules} ON {osmrel}.{_ruleID} = {rules}.{ruleID} " ") AS jtbl ON {osmfeat}.{osmid} = jtbl.{relosmid}" ).format( osmfeat = DB_SCHEMA["OSM_FEATURES"]["NAME"], osmid = DB_SCHEMA["OSM_FEATURES"]["OSM_ID"], key = DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"], val = DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"], osmrel = DB_SCHEMA[nomenclature]["OSM_RELATION"], relosmid = DB_SCHEMA[nomenclature]["OSM_FK"], rellulccls = DB_SCHEMA[nomenclature]["CLS_FK"], _ruleID = DB_SCHEMA[nomenclature]["RULE_FK"], rules = DB_SCHEMA["RULES"]["NAME"], ruleID = DB_SCHEMA["RULES"]["RULE_ID"], ruleName = DB_SCHEMA["RULES"]["RULE_NAME"], bufferCol = DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"], areaCol = DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"] ) return q_to_obj(PROCEDURE_DB, Q, db_api='sqlite')
def get_not_used_tags(OSM_FILE, OUT_TBL): """ Use a file OSM to detect tags not considered in the OSM2LULC procedure """ import os from gasp.to import obj_to_tbl from gasp.gt.attr import sel_by_attr from gasp.sql.fm import q_to_obj from gasp.pyt.df.split import df_split from gasp.pyt.oss import fprop from gasp.gt.toshp.osm import osm_to_gpkg OSM_TAG_MAP = { "DB": os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'osmtolulc.sqlite'), "OSM_FEAT": "osm_features", "KEY_COL": "key", "VALUE_COL": "value", "GEOM_COL": "geom" } WORKSPACE = os.path.dirname(OUT_TBL) sqdb = osm_to_gpkg( OSM_FILE, os.path.join(WORKSPACE, fprop(OSM_FILE, 'fn') + '.gpkg')) # Get Features we are considering ourOSMFeatures = q_to_obj( OSM_TAG_MAP["DB"], ("SELECT {key} AS key_y, {value} AS value_y, {geom} AS geom_y " "FROM {tbl}").format(key=OSM_TAG_MAP["KEY_COL"], value=OSM_TAG_MAP["VALUE_COL"], geom=OSM_TAG_MAP["GEOM_COL"], tbl=OSM_TAG_MAP["OSM_FEAT"]), db_api='sqlite') # Get Features in File TABLES_TAGS = { 'points': ['highway', 'man_made', 'building'], 'lines': ['highway', 'waterway', 'aerialway', 'barrier', 'man_made', 'railway'], 'multipolygons': [ 'aeroway', 'amenity', 'barrier', 'building', 'craft', 'historic', 'land_area', '' 'landuse', 'leisure', 'man_made', 'military', 'natural', 'office', 'place', 'shop', 'sport', 'tourism', 'waterway', 'power', 'railway', 'healthcare', 'highway' ] } Qs = [ " UNION ALL ".join([( "SELECT '{keycol}' AS key, {keycol} AS value, " "'{geomtype}' AS geom FROM {tbl} WHERE " "{keycol} IS NOT NULL" ).format( keycol=c, geomtype='Point' if table == 'points' else 'Line' \ if table == 'lines' else 'Polygon', tbl=table ) for c in TABLES_TAGS[table]]) for table in TABLES_TAGS ] fileOSMFeatures = q_to_obj(sqdb, ("SELECT key, value, geom FROM ({}) AS foo " "GROUP BY key, value, geom").format( " UNION ALL ".join(Qs)), db_api='sqlite') _fileOSMFeatures = fileOSMFeatures.merge( ourOSMFeatures, how='outer', left_on=["key", "value", "geom"], right_on=["key_y", "value_y", "geom_y"]) # Select OSM Features of file without correspondence _fileOSMFeatures["isnew"] = _fileOSMFeatures.key_y.fillna(value='nenhum') newTags = _fileOSMFeatures[_fileOSMFeatures.isnew == 'nenhum'] newTags["value"] = newTags.value.str.replace("'", "''") newTags["whr"] = newTags.key + "='" + newTags.value + "'" # Export tags not being used to new shapefile def to_regular_str(row): san_str = row.whr row["whr_san"] = san_str return row for t in TABLES_TAGS: if t == 'points': filterDf = newTags[newTags.geom == 'Point'] elif t == 'lines': filterDf = newTags[newTags.geom == 'Line'] elif t == 'multipolygons': filterDf = newTags[newTags.geom == 'Polygon'] if filterDf.shape[0] > 500: dfs = df_split(filterDf, 500, nrows=True) else: dfs = [filterDf] Q = "SELECT * FROM {} WHERE {}".format( t, filterDf.whr.str.cat(sep=" OR ")) i = 1 for df in dfs: fn = t + '.shp' if len(dfs) == 1 else '{}_{}.shp'.format(t, str(i)) try: shp = sel_by_attr(sqdb, Q.format(t, df.whr.str.cat(sep=" OR ")), os.path.join(WORKSPACE, fn), api_gis='ogr') except: __df = df.apply(lambda x: to_regular_str(x), axis=1) shp = sel_by_attr(sqdb, Q.format(t, __df.whr.str.cat(sep=" OR ")), os.path.join(WORKSPACE, fn)) i += 1 # Export OUT_TBL with tags not being used newTags.drop(['key_y', 'value_y', 'geom_y', 'isnew', 'whr'], axis=1, inplace=True) obj_to_tbl(newTags, OUT_TBL, sheetsName="new_tags", sanitizeUtf8=True) return OUT_TBL
def basic_buffer(osmdb, lineTable, dataFolder, apidb='SQLITE'): """ Data from Lines table to Polygons using a basic buffering stratagie """ import datetime from gasp.sql.fm import q_to_obj if apidb == 'POSTGIS': from gasp.gql.prox import st_buffer else: from gasp.gql.prox import splite_buffer as st_buffer from gasp.gt.torst import shp_to_rst from gasp.gt.toshp.cff import shp_to_grs time_a = datetime.datetime.now().replace(microsecond=0) lulcCls = q_to_obj( osmdb, ("SELECT basic_buffer FROM {} WHERE basic_buffer IS NOT NULL " "GROUP BY basic_buffer").format(lineTable), db_api='psql' if apidb == 'POSTGIS' else 'sqlite').basic_buffer.tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0: ('check_cls', time_b - time_a)} clsRst = {} tk = 1 for cls in lulcCls: # Run BUFFER Tool time_x = datetime.datetime.now().replace(microsecond=0) bb_file = st_buffer(osmdb, lineTable, "bf_basic_buffer", "geometry", os.path.join( dataFolder, 'bb_rule5_{}.shp'.format(str(int(cls)))), whrClause="basic_buffer={}".format(str(int(cls))), outTblIsFile=True, dissolve="ALL", cols_select="basic_buffer") time_y = datetime.datetime.now().replace(microsecond=0) # Data TO GRASS grsVect = shp_to_grs(bb_file, "bb_{}".format(int(cls)), asCMD=True, filterByReg=True) time_z = datetime.datetime.now().replace(microsecond=0) # Data to Raster rstVect = shp_to_rst(grsVect, int(cls), None, None, "rbb_{}".format(int(cls)), api="grass") time_w = datetime.datetime.now().replace(microsecond=0) clsRst[int(cls)] = rstVect timeGasto[tk] = ('do_buffer_{}'.format(cls), time_y - time_x) timeGasto[tk + 1] = ('import_{}'.format(cls), time_z - time_y) timeGasto[tk + 2] = ('torst_{}'.format(cls), time_w - time_z) tk += 3 return clsRst, timeGasto
def num_base_buffer(osmdb, lineTbl, folder, cells, srscode, rtemplate, api='SQLITE'): """ Data from Lines to Polygons """ import datetime from threading import Thread from gasp.sql.fm import q_to_obj if api == 'SQLITE': from gasp.gql.prox import splite_buffer as st_buffer else: from gasp.gql.prox import st_buffer from gasp.gt.torst import shp_to_rst # Get LULC Classes to be selected time_a = datetime.datetime.now().replace(microsecond=0) lulcCls = q_to_obj( osmdb, ("SELECT basic_buffer FROM {} WHERE basic_buffer IS NOT NULL " "GROUP BY basic_buffer").format(lineTbl), db_api='psql' if api == 'POSTGIS' else 'sqlite').basic_buffer.tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0: ('check_cls', time_b - time_a)} clsRst = {} def exportAndBufferB(CLS, cnt): # Run BUFFER Tool time_x = datetime.datetime.now().replace(microsecond=0) bb_file = st_buffer(osmdb, lineTbl, "bf_basic_buffer", "geometry", os.path.join( folder, 'bb_rule5_{}.shp'.format(str(int(CLS)))), whrClause="basic_buffer={}".format(str(int(CLS))), outTblIsFile=True, dissolve=None, cols_select="basic_buffer") time_y = datetime.datetime.now().replace(microsecond=0) # To raster rstCls = shp_to_rst(bb_file, None, cells, 0, os.path.join(folder, 'rst_bbfr_{}.tif'.format(CLS)), epsg=srscode, rst_template=rtemplate, api='gdal') time_z = datetime.datetime.now().replace(microsecond=0) clsRst[CLS] = rstCls timeGasto[cnt + 1] = ('buffer_{}'.format(str(CLS)), time_y - time_x) timeGasto[cnt + 2] = ('torst_{}'.format(str(CLS)), time_z - time_y) thrds = [ Thread(name="r5-{}".format(lulcCls[i]), target=exportAndBufferB, args=(lulcCls[i], (i + 1) * 10)) for i in range(len(lulcCls)) ] for t in thrds: t.start() for t in thrds: t.join() return clsRst, timeGasto
def num_selection(osmdb, polyTbl, folder, cellsize, srscode, rstTemplate, api='SQLITE'): """ Select and Convert to Raster """ import datetime; import os from threading import Thread if api == 'SQLITE': from gasp.gt.attr import sel_by_attr else: from gasp.gt.toshp.db import dbtbl_to_shp as sel_by_attr from gasp.sql.fm import q_to_obj from gasp.gt.torst import shp_to_rst # Get classes in data time_a = datetime.datetime.now().replace(microsecond=0) classes = q_to_obj(osmdb, ( "SELECT selection FROM {} " "WHERE selection IS NOT NULL " "GROUP BY selection" ).format( polyTbl ), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0 : ('check_cls', time_b - time_a)} # Select and Export clsRst = {} SQL_Q = "SELECT {lc} AS cls, geometry FROM {tbl} WHERE selection={lc}" def FilterAndExport(CLS, cnt): time_x = datetime.datetime.now().replace(microsecond=0) if api == 'SQLITE': shp = sel_by_attr( osmdb, SQL_Q.format(lc=str(CLS), tbl=polyTbl), os.path.join(folder, 'sel_{}.shp'.format(str(CLS))), api_gis='ogr' ) else: shp = sel_by_attr( osmdb, SQL_Q.format(lc=str(CLS), tbl=polyTbl), "geometry", os.path.join(folder, 'sel_{}.shp'.format(str(CLS))), api='pgsql2shp', tableIsQuery=True ) time_y = datetime.datetime.now().replace(microsecond=0) rstCls = shp_to_rst( shp, None, cellsize, 0, os.path.join(folder, 'sel_{}.tif'.format(str(CLS))), epsg=srscode, rst_template=rstTemplate, api='gdal' ) time_z = datetime.datetime.now().replace(microsecond=0) clsRst[int(CLS)] = rstCls timeGasto[cnt + 1] = ('toshp_{}'.format(str(CLS)), time_y - time_x) timeGasto[cnt + 2] = ('torst_{}'.format(str(CLS)), time_z - time_y) trs = [] for i in range(len(classes)): trs.append(Thread( name="lll{}".format(str(classes[i])), target=FilterAndExport, args=(classes[i], (i+1) * 10) )) for t in trs: t.start() for t in trs: t.join() return clsRst, timeGasto
def run_query_for_values_in_col(db, query, table_interest_col, interest_col, outworkspace): """ Execute a query for each value in one column In each iteration, the values may participate in the query. Export the several tables to excel Example: ID_PERCURSO | PARAGEM | DIA | GEOM 0 | 255 |'2018-01-01 | xxxx 0 | 255 |'2018-01-01 | xxxx 0 | 254 |'2018-01-01 | xxxx 0 | 254 |'2018-01-01 | xxxx 0 | 255 |'2018-01-02 | xxxx 0 | 255 |'2018-01-02 | xxxx 0 | 254 |'2018-01-02 | xxxx 0 | 254 |'2018-01-02 | xxxx For a query as: SELECT ID_PERCURSO, PARAGEM, GEOM, DIA, COUNT(PARAGEM) AS conta FROM table WHERE DIA={} GROUP BY PARAGEM, GEOM, DIA; This method will generate two tables: First table: ID_PERCURSO | PARAGEM | DIA | GEOM | conta 0 | 255 |'2018-01-01 | xxxx | 2 0 | 254 |'2018-01-01 | xxxx | 2 Second table: ID_PERCURSO | PARAGEM | DIA | GEOM | conta 0 | 255 |'2018-01-02 | xxxx | 2 0 | 254 |'2018-01-02 | xxxx | 2 {} will be replaced for every value in the interest_column that will be iterated one by one """ import os from gasp.sql.fm import q_to_obj from gasp.sql.i import cols_type from gasp.to import obj_to_tbl fields_types = cols_type(db, table_interest_col) # Get unique values VALUES = q_to_obj(db, "SELECT {col} FROM {t} GROUP BY {col}".format( col=interest_col, t=table_interest_col ), db_api='psql' )[interest_col].tolist() # Aplly query for every value in VALUES # Write data in excel for value in VALUES: data = q_to_obj(db, query.format( str(value[0]) if fields_types[interest_col] != str else \ "'{}'".format(str(value[0])) ), db_api='psql') obj_to_tbl(data, os.path.join(outworkspace, '{}_{}.xlsx'.format( table_interest_col, str(value[0]) )))
def rst_pnt_to_build(osmdb, pntTable, polyTable, api_db='SQLITE'): """ Replace buildings with tag yes using the info in the Points Layer Only used for URBAN ATLAS and CORINE LAND COVER """ import datetime as dt from gasp.sql.i import row_num as cnt_row from gasp.sql.fm import q_to_obj from gasp.gt.toshp.db import dbtbl_to_shp as db_to_shp from gasp.gql.ovly import feat_within, feat_not_within from gasp.gt.torst import shp_to_rst time_a = dt.datetime.now().replace(microsecond=0) new_build = feat_within( osmdb, ("(SELECT buildings AS pnt_build, geometry AS pnt_geom " "FROM {} WHERE buildings IS NOT NULL)").format(pntTable), "pnt_geom", ("(SELECT buildings AS poly_build, geometry AS poly_geom " "FROM {} WHERE buildings IS NOT NULL)").format(polyTable), "poly_geom", "new_buildings", inTblCols="pnt_build AS cls", withinCols="poly_geom AS geometry", outTblIsFile=None, apiToUse="OGR_SPATIALITE" if api_db != "POSTGIS" else api_db, geom_col="geometry") time_b = dt.datetime.now().replace(microsecond=0) yes_build = feat_not_within( osmdb, ("(SELECT buildings AS poly_build, geometry AS poly_geom " "FROM {} WHERE buildings IS NOT NULL)").format(polyTable), "poly_geom", ("(SELECT buildings AS pnt_build, geometry AS pnt_geom " "FROM {} WHERE buildings IS NOT NULL)").format(pntTable), "pnt_geom", "yes_builds", inTblCols="poly_geom AS geometry, 11 AS cls", outTblIsFile=None, apiToUse="OGR_SPATIALITE" if api_db != "POSTGIS" else api_db, geom_col="geometry") time_c = dt.datetime.now().replace(microsecond=0) resLayers = {} N11 = cnt_row(osmdb, yes_build, api='psql' if api_db == 'POSTGIS' else 'sqlite') time_d = dt.datetime.now().replace(microsecond=0) if N11: # Data to GRASS GIS grsBuild11 = db_to_shp( osmdb, yes_build, "geometry", "yes_builds", notTable=True, filterByReg=True, inDB='psql' if api_db == 'POSTGIS' else 'sqlite', outShpIsGRASS=True) time_f = dt.datetime.now().replace(microsecond=0) # To raster rstBuild11 = shp_to_rst(grsBuild11, 11, None, None, "rst_builds11", api="grass") time_g = dt.datetime.now().replace(microsecond=0) resLayers[11] = [rstBuild11] else: time_f = None time_g = None # Add data into GRASS GIS lulcCls = q_to_obj( osmdb, "SELECT cls FROM {} GROUP BY cls".format(new_build), db_api='psql' if api_db == 'POSTGIS' else 'sqlite').cls.tolist() timeGasto = { 0: ('intersect', time_b - time_a), 1: ('disjoint', time_c - time_b), 2: ('count_b11', time_d - time_c), 3: None if not time_f else ('import_b11', time_f - time_d), 4: None if not time_g else ('torst_b11', time_g - time_f), } tk = 5 for cls in lulcCls: time_x = dt.datetime.now().replace(microsecond=0) shp = db_to_shp(osmdb, new_build, "geometry", "nbuild_{}".format(str(cls)), "cls = {}".format(cls), notTable=True, filterByReg=True, outShpIsGRASS=True) time_y = dt.datetime.now().replace(microsecond=0) rstb = shp_to_rst(shp, int(cls), None, None, "rst_nbuild_{}".format(str(cls)), api="grass") time_z = dt.datetime.now().replace(microsecond=0) if int(cls) == 11 and int(cls) in resLayers: resLayers[int(cls)].append(rstb) else: resLayers[int(cls)] = [rstb] timeGasto[tk] = ('import_bn{}'.format(cls), time_y - time_x) timeGasto[tk + 1] = ('torst_bn{}'.format(cls), time_z - time_y) tk += 2 return resLayers, timeGasto
def tbl_to_area_mtx(db, tbl, cola, colb, result): """ Table to Matrix Table as: FID | col_a | col_b | geom 0 | 1 | A | A | .... 0 | 2 | A | B | .... 0 | 3 | A | A | .... 0 | 4 | A | C | .... 0 | 5 | A | B | .... 0 | 6 | B | A | .... 0 | 7 | B | A | .... 0 | 8 | B | B | .... 0 | 9 | B | B | .... 0 | 10 | C | A | .... 0 | 11 | C | B | .... 0 | 11 | C | D | .... To: classe | A | B | C | D A | | | | B | | | | C | | | | D | | | | cola = rows colb = cols """ from gasp.sql.to import q_to_ntbl from gasp.sql.fm import q_to_obj ref_val = q_to_obj(db, ("SELECT fcol FROM (" "SELECT CAST({map1_cls} AS text) AS fcol FROM {tbl} " "GROUP BY {map1_cls} " "UNION ALL " "SELECT CAST({map2_cls} AS text) AS fcol FROM {tbl} " "GROUP BY {map2_cls}" ") AS foo GROUP BY fcol ORDER BY fcol").format( tbl=tbl, map1_cls=cola, map2_cls=colb, ), db_api='psql').fcol.tolist() mtx_tbl = q_to_ntbl(db, result, ("SELECT * FROM crosstab('" "SELECT CASE " "WHEN foo.{map1_cls} IS NOT NULL " "THEN foo.{map1_cls} ELSE jtbl.flyr " "END AS lulc1_cls, CASE " "WHEN foo.{map2_cls} IS NOT NULL " "THEN foo.{map2_cls} ELSE jtbl.slyr " "END AS lulc2_cls, CASE " "WHEN foo.garea IS NOT NULL " "THEN round(CAST(foo.garea / 1000000 AS numeric)" ", 3) ELSE 0 " "END AS garea FROM (" "SELECT CAST({map1_cls} AS text) AS {map1_cls}, " "CAST({map2_cls} AS text) AS {map2_cls}, " "SUM(ST_Area(geom)) AS garea " "FROM {tbl} GROUP BY {map1_cls}, {map2_cls}" ") AS foo FULL JOIN (" "SELECT f.flyr, s.slyr FROM (" "SELECT CAST({map1_cls} AS text) AS flyr " "FROM {tbl} GROUP BY {map1_cls}" ") AS f, (" "SELECT CAST({map2_cls} AS text) AS slyr " "FROM {tbl} GROUP BY {map2_cls}" ") AS s" ") AS jtbl " "ON foo.{map1_cls} = jtbl.flyr AND " "foo.{map2_cls} = jtbl.slyr " "ORDER BY 1,2" "') AS ct(" "lulc_cls text, {crossCols}" ")").format(crossCols=", ".join( ["cls_{} numeric".format(c) for c in ref_val]), tbl=tbl, map1_cls=cola, map2_cls=colb), api='psql') return mtx_tbl
def dbtbl_to_shp(db, tbl, geom_col, outShp, where=None, inDB='psql', notTable=None, filterByReg=None, outShpIsGRASS=None, tableIsQuery=None, api='psql', epsg=None): """ Database Table to Feature Class file idDB Options: * psql * sqlite api Options: * psql * sqlite * pgsql2shp if outShpIsGRASS if true, the method assumes that outShp is a GRASS Vector. That implies that a GRASS Session was been started already. """ from gasp.gt.toshp import df_to_shp if outShpIsGRASS: from gasp import exec_cmd from gasp.cons.psql import con_psql db_con = con_psql() whr = "" if not where else " where=\"{}\"".format(where) cmd_str = ( "v.in.ogr input=\"PG:host={} dbname={} user={} password={} " "port={}\" output={} layer={} geometry={}{}{}{} -o --overwrite --quiet" ).format( db_con["HOST"], db, db_con["USER"], db_con["PASSWORD"], db_con["PORT"], outShp, tbl, geom_col, whr, " -t" if notTable else "", " -r" if filterByReg else "" ) if inDB == 'psql' else ( "v.in.ogr -o input={} layer={} output={}{}{}{}" ).format(db, tbl, outShp, whr, " -t" if notTable else "", " -r" if filterByReg else "" ) if inDB == 'sqlite' else None rcmd = exec_cmd(cmd_str) else: if api == 'pgsql2shp': from gasp import exec_cmd from gasp.cons.psql import con_psql db_con = con_psql() outcmd = exec_cmd(( 'pgsql2shp -f {out} -h {hst} -u {usr} -p {pt} -P {pas}{geom} ' '{bd} {t}' ).format( hst=db_con['HOST'], usr=db_con["USER"], pt=db_con["PORT"], pas=db_con['PASSWORD'], bd=db, out=outShp, t=tbl if not tableIsQuery else '"{}"'.format(tbl), geom="" if not geom_col else " -g {}".format(geom_col) )) elif api == 'psql' or api == 'sqlite': from gasp.sql.fm import q_to_obj q = "SELECT * FROM {}".format(tbl) if not tableIsQuery else tbl df = q_to_obj(db, q, db_api=api, geomCol=geom_col, epsg=epsg) outsh = df_to_shp(df, outShp) else: raise ValueError(( 'api value must be \'psql\', \'sqlite\' or \'pgsql2shp\'')) return outShp
def ID_rows_with_temporal_proximity_by_entities(db, table, entity_field, day_field, hour_field, hour_decimal_field, time_tolerance, outXlsPath): """ Retrieve rows from one pgtable with some temporal proximity Table structure should be entity | day | hour | hour_decimal 0 | 2018-01-02 | 5 | 5,10 0 | 2018-01-03 | 4 | 4,15 0 | 2018-01-02 | 5 | 5,12 0 | 2018-01-02 | 5 | 5,8 1 | 2018-01-02 | 4 | 4,10 1 | 2018-01-02 | 5 | 5,12 1 | 2018-01-02 | 4 | 4,20 1 | 2018-01-02 | 4 | 4,12 1 | 2018-01-02 | 4 | 4,6 For a time_tolerance of 5 minutes, the output table will have the rows with a temporal difference inside/bellow that time tolerance entity_field could be more than one field This method only identifies if one entity, for one day, has rows very close of each others, in terms of time. Not a good strategy for large tables. For large tables, SQL based methods are needed """ import pandas from gasp.pyt import obj_to_lst from gasp.sql.fm import q_to_obj from gasp.sql.i import cols_type from gasp.to import obj_to_tbl entity_field = obj_to_lst(entity_field) COLS = entity_field + [day_field, hour_field] COLS_TYPE = cols_type(db, table) # TIME TOLERANCE IN HOURS TIME_TOLERANCE = time_tolerance / 60.0 def thereIsRowsSameTimeInt(row): whr = [] for c in COLS: if COLS_TYPE[c] == str: whr.append("{}='{}'".format(c, row[c])) else: whr.append("{}={}".format(c, row[c])) hourRows = q_to_obj(db, "SELECT {} FROM {} WHERE {}".format( hour_decimal_field, table, " AND ".join(whr)), db_api='psql')[hour_decimal_field].tolist() for i in range(len(hourRows)): for e in range(i + 1, len(hourRows)): dif = abs(hourRows[i][0] - hourRows[e][0]) if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: row['time_difference'] = 1 else: row['time_difference'] = 0 return row # Count entity occourrences for one day and hour countsByEntityTime = q_to_obj( db, ("SELECT {scols}, conta FROM " "(SELECT {scols}, COUNT({ent}) AS conta FROM {tbl} " "GROUP BY {scols}) AS foo WHERE conta > 1").format( scols=', '.join(COLS), ent=entity_field[0], tbl=table), db_api='psql') # For each row in the last count, When count is > 1 # Check time difference between rows for one day and hour countsByEntityTime = countsByEntityTime.apply( lambda x: thereIsRowsSameTimeInt(x), axis=1) obj_to_tbl(countsByEntityTime, outXlsPath) return outXlsPath
def check_shape_diff(SHAPES_TO_COMPARE, OUT_FOLDER, REPORT, DB, GRASS_REGION_TEMPLATE): """ Script to check differences between pairs of Feature Classes Suponha que temos diversas Feature Classes (FC) e que cada uma delas possui um determinado atributo; imagine tambem que, considerando todos os pares possiveis entre estas FC, se pretende comparar as diferencas na distribuicao dos valores desse atributo para cada par. * Dependencias: - GRASS; - PostgreSQL; - PostGIS. """ import datetime import os import pandas from gasp.sql.fm import q_to_obj from gasp.to import db_to_tbl from gasp.sql.to import df_to_db from gasp.gt.toshp.cff import shp_to_shp from gasp.gt.toshp.db import dbtbl_to_shp from gasp.gt.toshp.rst import rst_to_polyg from gasp.gql.to import shp_to_psql from gasp.gql.tomtx import tbl_to_area_mtx from gasp.gt.prop.ff import check_isRaster from gasp.pyt.oss import fprop from gasp.sql.db import create_db from gasp.sql.tbl import tbls_to_tbl from gasp.sql.to import q_to_ntbl from gasp.gql.cln import fix_geom from gasp.to import db_to_tbl # Check if folder exists, if not create it if not os.path.exists(OUT_FOLDER): from gasp.pyt.oss import mkdir mkdir(OUT_FOLDER, overwrite=None) else: raise ValueError('{} already exists!'.format(OUT_FOLDER)) from gasp.gt.wenv.grs import run_grass gbase = run_grass(OUT_FOLDER, grassBIN='grass78', location='shpdif', srs=GRASS_REGION_TEMPLATE) import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, OUT_FOLDER, 'shpdif', 'PERMANENT') from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp from gasp.gt.torst import rst_to_grs from gasp.gt.tbl.fld import rn_cols # Convert to SHAPE if file is Raster i = 0 _SHP_TO_COMPARE = {} for s in SHAPES_TO_COMPARE: isRaster = check_isRaster(s) if isRaster: # To GRASS rstName = fprop(s, 'fn') inRst = rst_to_grs(s, "rst_" + rstName, as_cmd=True) # To Vector d = rst_to_polyg(inRst, rstName, rstColumn="lulc_{}".format(i), gisApi="grass") # Export Shapefile shp = grs_to_shp(d, os.path.join(OUT_FOLDER, d + '.shp'), "area") _SHP_TO_COMPARE[shp] = "lulc_{}".format(i) else: # To GRASS grsV = shp_to_grs(s, fprop(s, 'fn'), asCMD=True) # Change name of column with comparing value ncol = "lulc_{}".format(str(i)) rn_cols(grsV, {SHAPES_TO_COMPARE[s]: "lulc_{}".format(str(i))}, api="grass") # Export shp = grs_to_shp(grsV, os.path.join(OUT_FOLDER, grsV + '_rn.shp'), "area") _SHP_TO_COMPARE[shp] = "lulc_{}".format(str(i)) i += 1 SHAPES_TO_COMPARE = _SHP_TO_COMPARE __SHAPES_TO_COMPARE = SHAPES_TO_COMPARE # Create database create_db(DB, api='psql') """ Union SHAPEs """ UNION_SHAPE = {} FIX_GEOM = {} SHPS = list(__SHAPES_TO_COMPARE.keys()) for i in range(len(SHPS)): for e in range(i + 1, len(SHPS)): # Optimized Union print("Union between {} and {}".format(SHPS[i], SHPS[e])) time_a = datetime.datetime.now().replace(microsecond=0) __unShp = optimized_union_anls( SHPS[i], SHPS[e], os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)), GRASS_REGION_TEMPLATE, os.path.join(OUT_FOLDER, "work_{}_{}".format(i, e)), multiProcess=True) time_b = datetime.datetime.now().replace(microsecond=0) print(time_b - time_a) # Rename cols unShp = rn_cols( __unShp, { "a_" + __SHAPES_TO_COMPARE[SHPS[i]]: __SHAPES_TO_COMPARE[SHPS[i]], "b_" + __SHAPES_TO_COMPARE[SHPS[e]]: __SHAPES_TO_COMPARE[SHPS[e]] }) UNION_SHAPE[(SHPS[i], SHPS[e])] = unShp # Send data to postgresql SYNTH_TBL = {} for uShp in UNION_SHAPE: # Send data to PostgreSQL union_tbl = shp_to_psql(DB, UNION_SHAPE[uShp], api='shp2pgsql') # Produce table with % of area equal in both maps areaMapTbl = q_to_ntbl( DB, "{}_syn".format(union_tbl), ("SELECT CAST('{lulc_1}' AS text) AS lulc_1, " "CAST('{lulc_2}' AS text) AS lulc_2, " "round(" "CAST(SUM(g_area) / 1000000 AS numeric), 4" ") AS agree_area, round(" "CAST((SUM(g_area) / MIN(total_area)) * 100 AS numeric), 4" ") AS agree_percentage, " "round(" "CAST(MIN(total_area) / 1000000 AS numeric), 4" ") AS total_area FROM (" "SELECT {map1_cls}, {map2_cls}, ST_Area(geom) AS g_area, " "CASE " "WHEN {map1_cls} = {map2_cls} " "THEN 1 ELSE 0 " "END AS isthesame, total_area FROM {tbl}, (" "SELECT SUM(ST_Area(geom)) AS total_area FROM {tbl}" ") AS foo2" ") AS foo WHERE isthesame = 1 " "GROUP BY isthesame").format( lulc_1=fprop(uShp[0], 'fn'), lulc_2=fprop(uShp[1], 'fn'), map1_cls=__SHAPES_TO_COMPARE[uShp[0]], map2_cls=__SHAPES_TO_COMPARE[uShp[1]], tbl=union_tbl), api='psql') # Produce confusion matrix for the pair in comparison matrixTbl = tbl_to_area_mtx(DB, union_tbl, __SHAPES_TO_COMPARE[uShp[0]], __SHAPES_TO_COMPARE[uShp[1]], union_tbl + '_mtx') SYNTH_TBL[uShp] = {"TOTAL": areaMapTbl, "MATRIX": matrixTbl} # UNION ALL TOTAL TABLES total_table = tbls_to_tbl(DB, [SYNTH_TBL[k]["TOTAL"] for k in SYNTH_TBL], 'total_table') # Create table with % of agreement between each pair of maps mapsNames = q_to_obj( DB, ("SELECT lulc FROM (" "SELECT lulc_1 AS lulc FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc FROM {tbl} GROUP BY lulc_2" ") AS lu GROUP BY lulc ORDER BY lulc").format(tbl=total_table), db_api='psql').lulc.tolist() FLDS_TO_PIVOT = ["agree_percentage", "total_area"] Q = ("SELECT * FROM crosstab('" "SELECT CASE " "WHEN foo.lulc_1 IS NOT NULL THEN foo.lulc_1 ELSE jtbl.tmp1 " "END AS lulc_1, CASE " "WHEN foo.lulc_2 IS NOT NULL THEN foo.lulc_2 ELSE jtbl.tmp2 " "END AS lulc_2, CASE " "WHEN foo.{valCol} IS NOT NULL THEN foo.{valCol} ELSE 0 " "END AS agree_percentage FROM (" "SELECT lulc_1, lulc_2, {valCol} FROM {tbl} UNION ALL " "SELECT lulc_1, lulc_2, {valCol} FROM (" "SELECT lulc_1 AS lulc_2, lulc_2 AS lulc_1, {valCol} " "FROM {tbl}" ") AS tst" ") AS foo FULL JOIN (" "SELECT lulc_1 AS tmp1, lulc_2 AS tmp2 FROM (" "SELECT lulc_1 AS lulc_1 FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc_1 FROM {tbl} GROUP BY lulc_2" ") AS tst_1, (" "SELECT lulc_1 AS lulc_2 FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc_2 FROM {tbl} GROUP BY lulc_2" ") AS tst_2 WHERE lulc_1 = lulc_2 GROUP BY lulc_1, lulc_2" ") AS jtbl ON foo.lulc_1 = jtbl.tmp1 AND foo.lulc_2 = jtbl.tmp2 " "ORDER BY lulc_1, lulc_2" "') AS ct(" "lulc_map text, {crossCols}" ")") TOTAL_AGREE_TABLE = None TOTAL_AREA_TABLE = None for f in FLDS_TO_PIVOT: if not TOTAL_AGREE_TABLE: TOTAL_AGREE_TABLE = q_to_ntbl( DB, "agreement_table", Q.format(tbl=total_table, valCol=f, crossCols=", ".join([ "{} numeric".format(map_) for map_ in mapsNames ])), api='psql') else: TOTAL_AREA_TABLE = q_to_ntbl(DB, "area_table", Q.format(tbl=total_table, valCol=f, crossCols=", ".join([ "{} numeric".format(map_) for map_ in mapsNames ])), api='psql') # Union Mapping UNION_MAPPING = pandas.DataFrame( [[k[0], k[1], fprop(UNION_SHAPE[k], 'fn')] for k in UNION_SHAPE], columns=['shp_a', 'shp_b', 'union_shp']) UNION_MAPPING = df_to_db(DB, UNION_MAPPING, 'union_map', api='psql') # Export Results TABLES = [UNION_MAPPING, TOTAL_AGREE_TABLE, TOTAL_AREA_TABLE ] + [SYNTH_TBL[x]["MATRIX"] for x in SYNTH_TBL] SHEETS = ["union_map", "agreement_percentage", "area_with_data_km"] + [ "{}_{}".format(fprop(x[0], 'fn')[:15], fprop(x[1], 'fn')[:15]) for x in SYNTH_TBL ] db_to_tbl(DB, ["SELECT * FROM {}".format(x) for x in TABLES], REPORT, sheetsNames=SHEETS, dbAPI='psql') return REPORT