def copy_fromdb_todb(conFromDb, conToDb, tables, qForTbl=None): """ Send PGSQL Tables from one database to other """ import pandas from gasp import goToList from gasp.fm.sql import query_to_df from gasp.sql.mng.fld import cols_name from gasp.to.sql import df_to_db tables = goToList(tables) for table in tables: cols = cols_name(conFromDb, table) if not qForTbl: tblDf = query_to_df(conFromDb, "SELECT {} FROM {}".format( ", ".join(cols), table), db_api='psql') else: if table not in qForTbl: tblDf = query_to_df(conFromDb, "SELECT {} FROM {}".format( ", ".join(cols), table), db_api='psql') else: tblDf = query_to_df(conFromDb, qForTbl[table], db_api='psql') df_to_db(conToDb, tblDf, table, api='psql')
def copy_fromdb_todb(conFromDb, conToDb, tables, qForTbl=None, api='pandas'): """ Send PGSQL Tables from one database to other """ from gasp import goToList api = 'pandas' if api != 'pandas' and api != 'psql' else api tables = goToList(tables) if api == 'pandas': from gasp.fm.sql import query_to_df from gasp.to.sql import df_to_db for table in tables: if not qForTbl: tblDf = query_to_df(conFromDb, "SELECT * FROM {}".format(table), db_api='psql') else: if table not in qForTbl: tblDf = query_to_df(conFromDb, "SELECT * FROM {}".format(table), db_api='psql') else: tblDf = query_to_df(conFromDb, qForTbl[table], db_api='psql') df_to_db(conToDb, tblDf, table, api='psql') else: import os from gasp.oss.ops import create_folder, del_folder from gasp.sql.mng.tbl import dump_table from gasp.sql.mng.tbl import restore_table tmpFolder = create_folder(os.path.dirname(os.path.abspath(__file__)), randName=True) for table in tables: # Dump sqlScript = dump_table(conFromDb, table, os.path.join(tmpFolder, table + ".sql")) # Restore tblname = restore_table(conToDb, sqlScript, table) del_folder(tmpFolder)
def get_osm_feat_by_rule(nomenclature): from gasp.fm.sql import query_to_df Q = ("SELECT jtbl.{rellulccls}, {osmfeat}.{key}, {osmfeat}.{val}, " "jtbl.{ruleName}, jtbl.{bufferCol}, jtbl.{areaCol} " "FROM {osmfeat} INNER JOIN (" "SELECT {osmrel}.{relosmid}, {osmrel}.{rellulccls}, " "{rules}.{ruleID}, {rules}.{ruleName}, " "{osmrel}.{bufferCol}, {osmrel}.{areaCol} " "FROM {osmrel} " "INNER JOIN {rules} ON {osmrel}.{_ruleID} = {rules}.{ruleID} " ") AS jtbl ON {osmfeat}.{osmid} = jtbl.{relosmid}").format( osmfeat=DB_SCHEMA["OSM_FEATURES"]["NAME"], osmid=DB_SCHEMA["OSM_FEATURES"]["OSM_ID"], key=DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"], val=DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"], osmrel=DB_SCHEMA[nomenclature]["OSM_RELATION"], relosmid=DB_SCHEMA[nomenclature]["OSM_FK"], rellulccls=DB_SCHEMA[nomenclature]["CLS_FK"], _ruleID=DB_SCHEMA[nomenclature]["RULE_FK"], rules=DB_SCHEMA["RULES"]["NAME"], ruleID=DB_SCHEMA["RULES"]["RULE_ID"], ruleName=DB_SCHEMA["RULES"]["RULE_NAME"], bufferCol=DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"], areaCol=DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"]) return query_to_df(PROCEDURE_DB, Q, db_api='sqlite')
def bar_chart_by_query(conParam, query, xaxis_col, chart_data_fld_name, series, outxls): """ Execute some query for each serie in series. Produce a bar chart with the data returned. Each serie will be new bar in the chart. So, the query statment must have a white space to put each serie in series... E.g.: "SELECT fld_name FROM t WHERE series_fld={}".format(serie in series) """ from gasp.fm.sql import query_to_df from gasp.mng.joins import combine_dfs from gasp.mng.xlstbx.charts import barchart_in_xls_from_df dataDf = [] for serie in series: data = query_to_df(conParam, query.format(serie), db_api='psql') data.rename(columns={chart_data_fld_name: serie}, inplace=True) dataDf.append(data) main_df = dataDf[0] main_df = combine_dfs(main_df, dataDf[1:], xaxis_col) outxls = barchart_in_xls_from_df(main_df, outxls, series, xaxis_col) return outxls
def thereIsRowsSameTimeInt(row): whr = [] for c in COLS: if COLS_TYPE[c] == str: whr.append("{}='{}'".format(c, row[c])) else: whr.append("{}={}".format(c, row[c])) hourRows = query_to_df(conParam, "SELECT {} FROM {} WHERE {}".format( hour_decimal_field, table, " AND ".join(whr) ), db_api='psql' )[hour_decimal_field].tolist() for i in range(len(hourRows)): for e in range(i+1, len(hourRows)): dif = abs(hourRows[i][0] - hourRows[e][0]) if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: row['time_difference'] = 1 else: row['time_difference'] = 0 return row
def split_table_by_col_distinct(conParam, pgtable, column): """ Create a new table for each value in one column """ from gasp.fm.sql import query_to_df from gasp.sql.mng.fld import get_columns_type from gasp.sql.mng.qw import ntbl_by_query fields_types = get_columns_type(conParam, pgtable) # Get unique values VALUES = query_to_df(conParam, "SELECT {col} FROM {t} GROUP BY {col}".format( col=interest_column, t=pgtable), db_api='psql')[interest_column].tolist() whr = '{}=\'{}\'' if fields_types[interest_column] == str else '{}={}' for row in VALUES: ntbl_by_query(conParam, '{}_{}'.format(pgtable, str(row[0])), "SELECT * FROM {} WHERE {}".format( pgtable, whr.format(interest_column, str(row[0]))), api='psql')
def osm_features_by_rule(nomenclature, rule): from gasp.fm.sql import query_to_df COLS = [ DB_SCHEMA[nomenclature]["CLS_FK"], DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"], DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"] ] if rule == 'area_upper' or rule == 'area_lower': rule_info_field = ", {}.{}".format( DB_SCHEMA[nomenclature]["OSM_RELATION"], DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"]) rule_info_field_ = ", osm_cls_rule.{}".format( DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"]) COLS.append(DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"]) elif rule == 'roads' or rule == 'basic_buffer': rule_info_field = ', {}.{}'.format( DB_SCHEMA[nomenclature]["OSM_RELATION"], DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"]) rule_info_field_ = ", osm_cls_rule.{}".format( DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"]) COLS.append(DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"]) else: rule_info_field = "" rule_info_field_ = "" QUERY = ("SELECT osm_cls_rule.{rellulccls}, {osmfeat}.{key}, " "{osmfeat}.{val}{rsupfield_} FROM {osmfeat} INNER JOIN (" "SELECT {osmrel}.{relosmid}, {osmrel}.{rellulccls}, " "{rules}.{_rule_id}, {rules}.{rule_name}{rsupfield} " "FROM {osmrel} " "INNER JOIN {rules} ON {osmrel}.{rule_id} = {rules}.{_rule_id} " "WHERE {rules}.{rule_name} = '{rule_in_processing}'" ") AS osm_cls_rule ON {osmfeat}.{osmid} = osm_cls_rule.{relosmid}" ).format(osmfeat=DB_SCHEMA["OSM_FEATURES"]["NAME"], osmid=DB_SCHEMA["OSM_FEATURES"]["OSM_ID"], key=DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"], val=DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"], osmrel=DB_SCHEMA[nomenclature]["OSM_RELATION"], relosmid=DB_SCHEMA[nomenclature]["OSM_FK"], rellulccls=DB_SCHEMA[nomenclature]["CLS_FK"], rule_id=DB_SCHEMA[nomenclature]["RULE_FK"], rules=DB_SCHEMA["RULES"]["NAME"], _rule_id=DB_SCHEMA["RULES"]["RULE_ID"], rule_name=DB_SCHEMA["RULES"]["RULE_NAME"], rule_in_processing=rule, rsupfield=rule_info_field, rsupfield_=rule_info_field_) osm_featTable = query_to_df(PROCEDURE_DB, QUERY, db_api='sqlite') return osm_featTable
def rst_area(osmLink, polygonTable, UPPER=True, api='SQLITE'): """ Select features with area upper than. A field with threshold is needed in the database. """ import datetime from gasp.fm.sql import query_to_df if api == 'POSTGIS': from gasp.to.shp.grs import psql_to_grs as db_to_grs else: from gasp.to.shp.grs import sqlite_to_shp as db_to_grs from gasp.to.rst import shp_to_raster from gasp.osm2lulc.var import GEOM_AREA RULE_COL = 'area_upper' if UPPER else 'area_lower' OPERATOR = " > " if UPPER else " < " WHR = "{ga} {op} t_{r} AND {r}={cls_}" # Get Classes time_a = datetime.datetime.now().replace(microsecond=0) lulcCls = query_to_df(osmLink, ( "SELECT {r} FROM {tbl} WHERE {ga} {op} t_{r} GROUP BY {r}" ).format( r=RULE_COL, tbl=polygonTable, ga=GEOM_AREA, op=OPERATOR ), db_api='psql' if api == 'POSTGIS' else 'sqlite')[RULE_COL].tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0 : ('check_cls', time_b - time_a)} # Import data into GRASS and convert it to raster clsRst = {} tk = 1 for cls in lulcCls: time_x = datetime.datetime.now().replace(microsecond=0) grsVect = db_to_grs( osmLink, polygonTable, "{}_{}".format(RULE_COL, cls), where=WHR.format( op=OPERATOR, r=RULE_COL, ga=GEOM_AREA, cls_=cls ), notTable=True, filterByReg=True ) time_y = datetime.datetime.now().replace(microsecond=0) timeGasto[tk] = ('import_{}'.format(cls), time_y - time_x) grsRst = shp_to_raster( grsVect, int(cls), None, None, "rst_{}".format(RULE_COL), api='grass' ) time_z = datetime.datetime.now().replace(microsecond=0) timeGasto[tk+1] = ('torst_{}'.format(cls), time_z - time_y) clsRst[int(cls)] = grsRst tk += 2 return clsRst, timeGasto
def split_table_entity_number(conP, table, entity_field, entity_number): """ Split tables in several using as reference a number of entities per table If a table has 1 000 000 entities and the entity_number is 250 000, this method will create four tables, each one with 250 000 entities. 250 000 entities, not rows. Don't forget that the main table may have more than one reference to the same entity. """ import pandas from gasp.fm.sql import query_to_df from gasp.sql.mng.fld import get_columns_type from gasp.sql.mng.qw import ntbl_by_query # Select entities in table entities = query_to_df(conP, "SELECT {c} FROM {t} GROUP BY {c}".format( c=entity_field, t=table), db_api='psql') # Split entities into groups acoording entity_number entityGroup = [] lower = 0 high = entity_number while lower <= len(entities.index): if high > len(entities.index): high = len(entities.index) entityGroup.append(entities.iloc[lower:high]) lower += entity_number high += entity_number # For each dataframe, create a new table COLS_TYPE = get_columns_type(conP, table) c = 0 for df in entityGroup: if COLS_TYPE[entity_field] != str: df[entity_field] = '{}='.format( entity_field) + df[entity_field].astype(str) else: df[entity_field] = '{}=\''.format( entity_field) + df[entity_field].astype(str) + '\'' whr = ' OR '.join(df[entity_field]) ntbl_by_query(conP, '{}_{}'.format(table, str(c)), ("SELECT * FROM {} WHERE {}").format(table, whr), api='psql') c += 1
def get_keys(): """ Return available keys for this API """ from gasp.fm.sql import query_to_df keys = query_to_df(APIS_DB, ("SELECT fid, key, date, nrqst FROM google " "ORDER BY fid"), db_api='sqlite') return keys
def grs_rst(osmLink, polyTbl, api='SQLITE'): """ Simple selection, convert result to Raster """ import datetime from gasp.fm.sql import query_to_df if api == 'POSTGIS': from gasp.to.shp.grs import psql_to_grs as db_to_grs else: from gasp.to.shp.grs import sqlite_to_shp as db_to_grs from gasp.to.rst import shp_to_raster # Get Classes time_a = datetime.datetime.now().replace(microsecond=0) lulcCls = query_to_df( osmLink, ("SELECT selection FROM {} " "WHERE selection IS NOT NULL " "GROUP BY selection").format(polyTbl), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0: ('check_cls', time_b - time_a)} # Import data into GRASS and convert it to raster clsRst = {} tk = 1 for cls in lulcCls: time_x = datetime.datetime.now().replace(microsecond=0) grsVect = db_to_grs(osmLink, polyTbl, "rule1_{}".format(str(cls)), where="selection = {}".format(str(cls)), notTable=True, filterByReg=True) time_y = datetime.datetime.now().replace(microsecond=0) grsRst = shp_to_raster(grsVect, int(cls), None, None, "rst_rule1_{}".format(str(cls)), api='grass') time_z = datetime.datetime.now().replace(microsecond=0) clsRst[int(cls)] = grsRst timeGasto[tk] = ('import_{}'.format(cls), time_y - time_x) timeGasto[tk + 1] = ('torst_{}'.format(cls), time_z - time_y) tk += 2 return clsRst, timeGasto
def lst_views(conParam, schema='public'): """ List Views in database """ from gasp.fm.sql import query_to_df views = query_to_df(conParam, ("SELECT table_name FROM information_schema.views " "WHERE table_schema='{}'").format(schema), db_api='psql') return views.table_name.tolist()
def build12_torst(buildTbl): LulcCls = query_to_df( osmLink, "SELECT cls FROM {} GROUP BY cls".format(buildTbl), db_api='psql' if apidb == 'POSTGIS' else 'sqlite').cls.tolist() for lulc_cls in LulcCls: time_m = datetime.datetime.now().replace(microsecond=0) # To SHP if apidb == 'SQLITE': shpB = sel_by_attr(osmLink, "SELECT * FROM {} WHERE cls={}".format( buildTbl, str(lulc_cls)), os.path.join( folder, 'nshp_build_{}.shp'.format(lulc_cls)), api_gis='ogr') else: shpB = sel_by_attr(osmLink, "SELECT * FROM {} WHERE cls={}".format( buildTbl, str(lulc_cls)), os.path.join( folder, 'nshp_build_{}.shp'.format(lulc_cls)), api='pgsql2shp', geom_col="geometry", tableIsQuery=True) time_n = datetime.datetime.now().replace(microsecond=0) # To RST brst = shp_to_raster(shpB, None, cells, 0, os.path.join( folder, 'nrst_build_{}.tif'.format(lulc_cls)), srscode, rstT, api='gdal') time_o = datetime.datetime.now().replace(microsecond=0) resLyr[int(lulc_cls)] = [brst] timeGasto[int(lulc_cls)] = ('to_shp_{}'.format(str(lulc_cls)), time_n - time_m) timeGasto[int(lulc_cls) + 1] = ('to_rst_n_{}'.format( str(lulc_cls)), time_o - time_n)
def get_distinct_values(lnk, pgtable, column): """ Get distinct values in one column of one pgtable """ from gasp import goToList from gasp.fm.sql import query_to_df data = query_to_df(lnk, "SELECT {col} FROM {t} GROUP BY {col};".format( col=", ".join(goToList(column)), t=pgtable), db_api='psql').to_dict(orient="records") return data
def check_geomtype_in_table(conParam, table, geomCol='geom'): """ Return the number of geometry types in table """ from gasp.fm.sql import query_to_df return int( query_to_df(conParam, ("SELECT COUNT(*) AS row_count FROM (" "SELECT ST_GeometryType((ST_Dump({})).geom) AS cnt_geom " "FROM {} GROUP BY ST_GeometryType((ST_Dump({})).geom)" ") AS foo").format(geomCol, table, geomCol), db_api='psql').iloc[0].row_count)
def lst_tbl(conObj, schema='public', excludeViews=None, api='psql'): """ list tables in a database API's Available: * psql; * sqlite; """ if api == 'psql': from gasp.fm.sql import query_to_df tbls = query_to_df(conObj, ( "SELECT table_name FROM information_schema.tables " "WHERE table_schema='{}'" ).format(schema), db_api='psql') if excludeViews: views = lst_views(conObj, schema=schema) __tbls = [i for i in tbls.table_name.tolist() if i not in views] else: __tbls = tbls.table_name.tolist() elif api == 'sqlite': """ List tables in one sqliteDB """ import sqlite3 conn = sqlite3.connect(conObj) cursor = conn.cursor() tables = cursor.execute( "SELECT name FROM sqlite_master WHERE type='table';" ) __tbls = [n[0] for n in tables] cursor.close() conn.close() else: raise ValueError('API {} is not available!'.format(api)) return __tbls
def arcg_selection(db, polTbl, fld): """ Select, Dissolve and Reproject using ArcGIS """ import datetime import os from gasp.mng.genze import dissolve from gasp.fm.sql import query_to_df from gasp.anls.exct import sel_by_attr # Get LULC Classes time_a = datetime.datetime.now().replace(microsecond=0) lulcCls = query_to_df( db, ("SELECT selection FROM {} " "WHERE selection IS NOT NULL GROUP BY selection").format(polTbl), db_api='sqlite').selection.tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0: ('check_cls', time_b - time_a)} # Extract Shps from DB clsShp = {} tk = 1 SQL = "SELECT selection, geometry FROM {} WHERE selection={}" for cls in lulcCls: time_x = datetime.datetime.now().replace(microsecond=0) shp = sel_by_attr(db, SQL.format(polTbl, str(cls)), os.path.join(fld, 'rule1_{}.shp'.format(cls)), api_gis='ogr') time_y = datetime.datetime.now().replace(microsecond=0) dShp = dissolve(shp, os.path.join(fld, "rul1_d_{}.shp".format(str(cls))), "FID", geomMultiPart=True) time_z = datetime.datetime.now().replace(microsecond=0) clsShp[int(cls)] = dShp timeGasto[tk] = ("export_{}".format(cls), time_y - time_x) timeGasto[tk + 1] = ("dissolve_{}".format(cls), time_z - time_y) tk += 2 return clsShp, timeGasto
def count_by_periods_with_certain_duration(conParam, PERIOD_INTERVAL, pgtable, TIME_FIELD, outTable, filterWhere=None): """ Count rows in a pgtable by periods of X minutes PERIOD_INTERVAL = "01:00:00" """ import pandas from gasp import day_to_intervals2 # Get Intervals INTERVALS = day_to_intervals2(PERIOD_INTERVAL) # For each interval/period, count the number of rows counting = None for _int_ in INTERVALS: QUERY = ( "SELECT COUNT(*) AS count FROM {table} WHERE " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= " "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < " "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr}" ).format( table = pgtable, timeCol = TIME_FIELD, minLower = _int_[0], minUpper = _int_[1], whr = "" if not filterWhere else " AND ({})".format( filterWhere ) ) count = query_to_df(conParam, QUERY, db_api='psql') count.rename(index={0 : "{}-{}".format( _int_[0][:5], _int_[1][:5] )}, inplace=True) if type(counting) != pandas.DataFrame: counting = count.copy() else: counting = counting.append(count, ignore_index=False) obj_to_tbl(counting, outTable) return outTable
def row_num(conObj, table, where=None, api='psql'): """ Return the number of rows in Query API's Available: * psql; * sqlite; """ from gasp.fm.sql import query_to_df d = query_to_df(conObj, "SELECT COUNT(*) AS nrows FROM {}{}".format( table, "" if not where else " WHERE {}".format(where) ), db_api=api) return int(d.iloc[0].nrows)
def get_tbl_extent(conParam, table, geomCol): """ Return extent of the geometries in one pgtable """ from gasp.fm.sql import query_to_df q = ("SELECT MIN(ST_X(pnt_geom)) AS eleft, MAX(ST_X(pnt_geom)) AS eright, " "MIN(ST_Y(pnt_geom)) AS bottom, MAX(ST_Y(pnt_geom)) AS top " "FROM (" "SELECT (ST_DumpPoints({geomcol})).geom AS pnt_geom " "FROM {tbl}" ") AS foo").format(tbl=table, geomcol=geomCol) ext = query_to_df(conParam, q, db_api='psql').to_dict(orient='index')[0] return [ext['eleft'], ext['bottom'], ext['eright'], ext['top']]
def count_entity_periods_with_certain_duration(CON_PSQL, PERIOD_INTERVAL, PGTABLE, TIME_FIELD, ENTITY_FIELD, OUT_TABLE, filterWhere=None): """ Count rows in a pgtable for a given period of X minutes for each interest entity PERIOD_INTERVAL = "01:00:00" """ import pandas from gasp import day_to_intervals2 from gasp.mng.joins import combine_dfs # Get Intervals INTERVALS = day_to_intervals2(PERIOD_INTERVAL) # For each interval/period, count the number of rows by entity counting = [] for _int in INTERVALS: Q = ( "SELECT {entityCol}, COUNT({entityCol}) AS {cntCol} " "FROM {table} WHERE " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= " "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < " "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr} " "GROUP BY {entityCol}" ).format( cntCol = "s{}_e{}".format(_int[0][:5], _int[1][:5]).replace(":", "_"), table = PGTABLE, timeCol=TIME_FIELD, entityCol=ENTITY_FIELD, minLower=_int[0], minUpper=_int[1], whr = "" if not filterWhere else " AND ({}) ".format(filterWhere) ) count = query_to_df(CON_PSQL, Q, db_api='psql') counting.append(count) mainDf = combine_dfs(counting[0], counting[1:], ENTITY_FIELD) obj_to_tbl(mainDf, OUT_TABLE) return OUT_TABLE
def db_to_db(conDBA, conDBB, typeDBA, typeDBB): """ All tables in one Database to other database typesDB options: * sqlite * psql """ import os from gasp.fm.sql import query_to_df from gasp.sql.mng.tbl import lst_tbl from gasp.sql.mng.db import create_db # List Tables in DB A tbls = lst_tbl(conDBA, excludeViews=True, api=typeDBA) # Create database B if typeDBB == 'psql': con_param = { "HOST": conDBB["HOST"], "USER": conDBB["USER"], "PORT": conDBB["PORT"], "PASSWORD": conDBB["PASSWORD"] } if "TEMPLATE" in conDBB: con_param["TEMPLATE"] = conDBB["TEMPLATE"] NEW_DB = conDBB["DATABASE"] else: con_param = os.path.dirname(conDBB) NEW_DB = os.path.basename(conDBB) db_b = create_db(con_param, NEW_DB, overwrite=False, api=typeDBB) # Table to Database B for tbl in tbls: df = query_to_df(conDBA, "SELECT * FROM {}".format(tbl), db_api=typeDBA) df_to_db(conDBB, df, tbl, append=None, api=typeDBB)
def check_last_id(lnk, pk, table): """ Check last ID of a given table return 0 if there is no data TODO: Do this with Pandas """ from gasp.sql.c import psqlcon from gasp.fm.sql import query_to_df q = "SELECT MAX({}) AS fid FROM {}".format(pk, table) d = query_to_df(lnk, q, db_api='psql').fid.tolist() if not d: return 0 else: return d[0]
def select_api_key(): """ Select the API Key to use """ from gasp.fm.sql import query_to_df GOOGLE_KEYS_ = query_to_df(APIS_DB, "SELECT fid, key, date, nrqst FROM google", db_api='sqlite').to_dict(orient="records") GOOGLE_KEYS = [(GOOGLE_KEYS_['fid'], GOOGLE_KEYS_['key'], GOOGLE_KEYS_['date'], GOOGLE_KEYS_['nrqst']) for k in GOOGLE_KEYS_] DATE = datetime.date.today() DAYTIME = '{}-{}-{}'.format( str(DATE.year), str(DATE.month) if len(str(DATE.month)) == 2 else "0" + str(DATE.month), str(DATE.day) if len(str(DATE.day)) == 2 else "0" + str(DATE.day)) for k in range(len(GOOGLE_KEYS)): if DAYTIME != str(GOOGLE_KEYS[k][2]): GOOGLE_API_KEY = GOOGLE_KEYS[k][1] API_REQUESTS = 0 KEY_FID = GOOGLE_KEYS[k][0] else: API_REQUESTS = int(GOOGLE_KEYS[k][3]) if API_REQUESTS >= 2490: continue else: GOOGLE_API_KEY = GOOGLE_KEYS[k][1] KEY_FID = GOOGLE_KEYS[k][0] if GOOGLE_API_KEY: break return KEY_FID, GOOGLE_API_KEY, API_REQUESTS
def num_selection(osmcon, polyTbl, folder, cellsize, srscode, rstTemplate, api='SQLITE'): """ Select and Convert to Raster """ import datetime; import os from threading import Thread if api == 'SQLITE': from gasp.anls.exct import sel_by_attr else: from gasp.to.shp import psql_to_shp as sel_by_attr from gasp.fm.sql import query_to_df from gasp.to.rst import shp_to_raster # Get classes in data time_a = datetime.datetime.now().replace(microsecond=0) classes = query_to_df(osmcon, ( "SELECT selection FROM {} " "WHERE selection IS NOT NULL " "GROUP BY selection" ).format( polyTbl ), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0 : ('check_cls', time_b - time_a)} # Select and Export clsRst = {} SQL_Q = "SELECT {lc} AS cls, geometry FROM {tbl} WHERE selection={lc}" def FilterAndExport(CLS, cnt): time_x = datetime.datetime.now().replace(microsecond=0) if api == 'SQLITE': shp = sel_by_attr( osmcon, SQL_Q.format(lc=str(CLS), tbl=polyTbl), os.path.join(folder, 'sel_{}.shp'.format(str(CLS))), api_gis='ogr' ) else: shp = sel_by_attr( osmcon, SQL_Q.format(lc=str(CLS), tbl=polyTbl), os.path.join(folder, 'sel_{}.shp'.format(str(CLS))), api='pgsql2shp', geom_col="geometry", tableIsQuery=True ) time_y = datetime.datetime.now().replace(microsecond=0) rstCls = shp_to_raster( shp, None, cellsize, 0, os.path.join(folder, 'sel_{}.tif'.format(str(CLS))), epsg=srscode, rst_template=rstTemplate, api='gdal' ) time_z = datetime.datetime.now().replace(microsecond=0) clsRst[int(CLS)] = rstCls timeGasto[cnt + 1] = ('toshp_{}'.format(str(CLS)), time_y - time_x) timeGasto[cnt + 2] = ('torst_{}'.format(str(CLS)), time_z - time_y) trs = [] for i in range(len(classes)): trs.append(Thread( name="lll{}".format(str(classes[i])), target=FilterAndExport, args=(classes[i], (i+1) * 10) )) for t in trs: t.start() for t in trs: t.join() return clsRst, timeGasto
def get_not_used_tags(OSM_FILE, OUT_TBL): """ Use a file OSM to detect tags not considered in the OSM2LULC procedure """ import os from gasp.anls.exct import sel_by_attr from gasp.fm.sql import query_to_df from gasp.oss import get_filename from gasp.osm2lulc.utils import osm_to_sqdb from gasp.to import obj_to_tbl OSM_TAG_MAP = { "DB": os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'osmtolulc.sqlite'), "OSM_FEAT": "osm_features", "KEY_COL": "key", "VALUE_COL": "value", "GEOM_COL": "geom" } WORKSPACE = os.path.dirname(OUT_TBL) sqdb = osm_to_sqdb( OSM_FILE, os.path.join(WORKSPACE, get_filename(OSM_FILE) + '.sqlite')) # Get Features we are considering ourOSMFeatures = query_to_df( OSM_TAG_MAP["DB"], ("SELECT {key} AS key_y, {value} AS value_y, {geom} AS geom_y " "FROM {tbl}").format(key=OSM_TAG_MAP["KEY_COL"], value=OSM_TAG_MAP["VALUE_COL"], geom=OSM_TAG_MAP["GEOM_COL"], tbl=OSM_TAG_MAP["OSM_FEAT"]), db_api='sqlite') # Get Features in File TABLES_TAGS = { 'points': ['highway', 'man_made', 'building'], 'lines': ['highway', 'waterway', 'aerialway', 'barrier', 'man_made', 'railway'], 'multipolygons': [ 'aeroway', 'amenity', 'barrier', 'building', 'craft', 'historic', 'land_area', '' 'landuse', 'leisure', 'man_made', 'military', 'natural', 'office', 'place', 'shop', 'sport', 'tourism', 'waterway', 'power', 'railway', 'healthcare', 'highway' ] } Qs = [ " UNION ALL ".join([( "SELECT '{keycol}' AS key, {keycol} AS value, " "'{geomtype}' AS geom FROM {tbl} WHERE " "{keycol} IS NOT NULL" ).format( keycol=c, geomtype='Point' if table == 'points' else 'Line' \ if table == 'lines' else 'Polygon', tbl=table ) for c in TABLES_TAGS[table]]) for table in TABLES_TAGS ] fileOSMFeatures = query_to_df(sqdb, ("SELECT key, value, geom FROM ({}) AS foo " "GROUP BY key, value, geom").format( " UNION ALL ".join(Qs)), db_api='sqlite') _fileOSMFeatures = fileOSMFeatures.merge( ourOSMFeatures, how='outer', left_on=["key", "value", "geom"], right_on=["key_y", "value_y", "geom_y"]) # Select OSM Features of file without correspondence _fileOSMFeatures["isnew"] = _fileOSMFeatures.key_y.fillna(value='nenhum') newTags = _fileOSMFeatures[_fileOSMFeatures.isnew == 'nenhum'] newTags["value"] = newTags.value.str.replace("'", "''") newTags["whr"] = newTags.key.str.encode('utf-8').astype(str) + "='" + \ newTags.value.str.encode('utf-8').astype(str) + "'" # Export OUT_TBL with tags not being used obj_to_tbl(newTags, OUT_TBL, sheetsName="new_tags", sanitizeUtf8=True) # Export tags not being used to new shapefile def to_regular_str(row): from gasp import unicode_to_str san_str = unicode_to_str(row.whr) row["whr_san"] = san_str return row for t in TABLES_TAGS: if t == 'points': filterDf = newTags[newTags.geom == 'Point'] elif t == 'lines': filterDf = newTags[newTags.geom == 'Line'] elif t == 'multipolygons': filterDf = newTags[newTags.geom == 'Polygon'] Q = unicode("SELECT * FROM {} WHERE {}", 'utf-8').format(unicode(t, 'utf-8'), filterDf.whr.str.cat(sep=" OR "), 'utf-8') try: shp = sel_by_attr(sqdb, Q, os.path.join(WORKPSACE, t + '.shp'), api_gis='ogr') except: __filterDf = filterDf.apply(lambda x: to_regular_str(x), axis=1) _Q = "SELECT * FROM {} WHERE {}".format( t, __filterDf.whr_san.str.cat(sep=" OR ")) shp = sel_by_attr(sqdb, _Q, os.path.join(WORKSPACE, t + '.shp')) return OUT_TBL
def ID_rows_with_temporal_proximity_by_entities(conParam, table, entity_field, day_field, hour_field, hour_decimal_field, time_tolerance, outXlsPath): """ Retrieve rows from one pgtable with some temporal proximity Table structure should be entity | day | hour | hour_decimal 0 | 2018-01-02 | 5 | 5,10 0 | 2018-01-03 | 4 | 4,15 0 | 2018-01-02 | 5 | 5,12 0 | 2018-01-02 | 5 | 5,8 1 | 2018-01-02 | 4 | 4,10 1 | 2018-01-02 | 5 | 5,12 1 | 2018-01-02 | 4 | 4,20 1 | 2018-01-02 | 4 | 4,12 1 | 2018-01-02 | 4 | 4,6 For a time_tolerance of 5 minutes, the output table will have the rows with a temporal difference inside/bellow that time tolerance entity_field could be more than one field This method only identifies if one entity, for one day, has rows very close of each others, in terms of time. Not a good strategy for large tables. For large tables, SQL based methods are needed """ import pandas from gasp import goToList from gasp.fm.sql import query_to_df from gasp.sql.mng.fld import get_columns_type from gasp.to import obj_to_tbl entity_field = goToList(entity_field) COLS = entity_field + [day_field, hour_field] COLS_TYPE = get_columns_type(conParam, table) # TIME TOLERANCE IN HOURS TIME_TOLERANCE = time_tolerance / 60.0 def thereIsRowsSameTimeInt(row): whr = [] for c in COLS: if COLS_TYPE[c] == str: whr.append("{}='{}'".format(c, row[c])) else: whr.append("{}={}".format(c, row[c])) hourRows = query_to_df(conParam, "SELECT {} FROM {} WHERE {}".format( hour_decimal_field, table, " AND ".join(whr) ), db_api='psql' )[hour_decimal_field].tolist() for i in range(len(hourRows)): for e in range(i+1, len(hourRows)): dif = abs(hourRows[i][0] - hourRows[e][0]) if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: row['time_difference'] = 1 else: row['time_difference'] = 0 return row # Count entity occourrences for one day and hour countsByEntityTime = query_to_df(conParam, ( "SELECT {scols}, conta FROM " "(SELECT {scols}, COUNT({ent}) AS conta FROM {tbl} " "GROUP BY {scols}) AS foo WHERE conta > 1" ).format( scols = ', '.join(COLS), ent = entity_field[0], tbl = table ), db_api='psql') # For each row in the last count, When count is > 1 # Check time difference between rows for one day and hour countsByEntityTime = countsByEntityTime.apply( lambda x: thereIsRowsSameTimeInt(x), axis=1 ) obj_to_tbl(countsByEntityTime, outXlsPath) return outXlsPath
def check_shape_diff(SHAPES_TO_COMPARE, OUT_FOLDER, REPORT, conPARAM, DB, SRS_CODE, GIS_SOFTWARE="GRASS", GRASS_REGION_TEMPLATE=None): """ Script to check differences between pairs of Feature Classes Suponha que temos diversas Feature Classes (FC) e que cada uma delas possui um determinado atributo; imagine também que, considerando todos os pares possíveis entre estas FC, se pretende comparar as diferenças na distribuição dos valores desse atributo em cada par. * Dependências: - ArcGIS; - GRASS; - PostgreSQL; - PostGIS. * GIS_SOFTWARE Options: - ARCGIS; - GRASS. """ import datetime import os import pandas from gasp.fm.sql import query_to_df from gasp.sql.mng.tbl import tbls_to_tbl from gasp.sql.mng.geom import fix_geom, check_geomtype_in_table from gasp.sql.mng.geom import select_main_geom_type from gasp.sql.mng.qw import ntbl_by_query from gasp.prop.ff import check_isRaster from gasp.oss import get_filename from gasp.sql.mng.db import create_db from gasp.to.sql import shp_to_psql, df_to_db from gasp.to.shp import rst_to_polyg from gasp.to.shp import shp_to_shp, psql_to_shp from gasp.to import db_to_tbl # Check if folder exists, if not create it if not os.path.exists(OUT_FOLDER): from gasp.oss.ops import create_folder create_folder(OUT_FOLDER, overwrite=None) else: raise ValueError('{} already exists!'.format(OUT_FOLDER)) # Start GRASS GIS Session if GIS_SOFTWARE == GRASS if GIS_SOFTWARE == "GRASS": if not GRASS_REGION_TEMPLATE: raise ValueError( 'To use GRASS GIS you need to specify GRASS_REGION_TEMPLATE') from gasp.session import run_grass gbase = run_grass(OUT_FOLDER, grassBIN='grass76', location='shpdif', srs=GRASS_REGION_TEMPLATE) import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, OUT_FOLDER, 'shpdif', 'PERMANENT') from gasp.mng.grstbl import rename_col from gasp.to.shp.grs import shp_to_grs, grs_to_shp from gasp.to.rst import rst_to_grs from gasp.mng.fld import rename_column # Convert to SHAPE if file is Raster # Import to GRASS GIS if GIS SOFTWARE == GRASS i = 0 _SHP_TO_COMPARE = {} for s in SHAPES_TO_COMPARE: isRaster = check_isRaster(s) if isRaster: if GIS_SOFTWARE == "ARCGIS": d = rst_to_polyg(s, os.path.join(os.path.dirname(s), get_filename(s) + '.shp'), gisApi='arcpy') _SHP_TO_COMPARE[d] = "gridcode" elif GIS_SOFTWARE == "GRASS": # To GRASS rstName = get_filename(s) inRst = rst_to_grs(s, "rst_" + rstName, as_cmd=True) # To Raster d = rst_to_polyg(inRst, rstName, rstColumn="lulc_{}".format(i), gisApi="grasscmd") # Export Shapefile shp = grs_to_shp(d, os.path.join(OUT_FOLDER, d + '.shp'), "area") _SHP_TO_COMPARE[shp] = "lulc_{}".format(i) else: if GIS_SOFTWARE == "ARCGIS": _SHP_TO_COMPARE[s] = SHAPES_TO_COMPARE[s] elif GIS_SOFTWARE == "GRASS": # To GRASS grsV = shp_to_grs(s, get_filename(s), asCMD=True) # Change name of column with comparing value rename_col(grsV, SHAPES_TO_COMPARE[s], "lulc_{}".format(i), as_cmd=True) # Export shp = grs_to_shp(grsV, os.path.join(OUT_FOLDER, grsV + '_rn.shp'), "area") _SHP_TO_COMPARE[shp] = "lulc_{}".format(i) i += 1 SHAPES_TO_COMPARE = _SHP_TO_COMPARE if GIS_SOFTWARE == "ARCGIS": from gasp.cpu.arcg.mng.fld import calc_fld from gasp.cpu.arcg.mng.wspace import create_geodb from gasp.mng.gen import copy_feat # Sanitize data and Add new field __SHAPES_TO_COMPARE = {} i = 0 # Create GeoDatabase geodb = create_geodb(OUT_FOLDER, 'geo_sanitize') """ Sanitize Data """ for k in SHAPES_TO_COMPARE: # Send data to GeoDatabase only to sanitize newFc = shp_to_shp(k, os.path.join(geodb, get_filename(k)), gisApi='arcpy') # Create a copy to change newShp = copy_feat(newFc, os.path.join(OUT_FOLDER, os.path.basename(k)), gisApi='arcpy') # Sanitize field name with interest data NEW_FLD = "lulc_{}".format(i) calc_fld(newShp, NEW_FLD, "[{}]".format(SHAPES_TO_COMPARE[k]), isNewField={ "TYPE": "INTEGER", "LENGTH": 5, "PRECISION": "" }) __SHAPES_TO_COMPARE[newShp] = NEW_FLD i += 1 else: __SHAPES_TO_COMPARE = SHAPES_TO_COMPARE # Create database conPARAM["DATABASE"] = create_db(conPARAM, DB) """ Union SHAPEs """ UNION_SHAPE = {} FIX_GEOM = {} def fix_geometry(shp): # Send data to PostgreSQL nt = shp_to_psql(conPARAM, shp, SRS_CODE, api='shp2pgsql') # Fix data corr_tbl = fix_geom(conPARAM, nt, "geom", "corr_{}".format(nt), colsSelect=['gid', __SHAPES_TO_COMPARE[shp]]) # Check if we have multiple geometries geomN = check_geomtype_in_table(conPARAM, corr_tbl) if geomN > 1: corr_tbl = select_main_geom_type(conPARAM, corr_tbl, "corr2_{}".format(nt)) # Export data again newShp = psql_to_shp(conPARAM, corr_tbl, os.path.join(OUT_FOLDER, corr_tbl + '.shp'), api='pgsql2shp', geom_col='geom') return newShp SHPS = __SHAPES_TO_COMPARE.keys() for i in range(len(SHPS)): for e in range(i + 1, len(SHPS)): if GIS_SOFTWARE == 'ARCGIS': # Try the union thing unShp = union(SHPS[i], SHPS[e], os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)), api_gis="arcpy") # See if the union went all right if not os.path.exists(unShp): # Union went not well # See if geometry was already fixed if SHPS[i] not in FIX_GEOM: # Fix SHPS[i] geometry FIX_GEOM[SHPS[i]] = fix_geometry(SHPS[i]) if SHPS[e] not in FIX_GEOM: FIX_GEOM[SHPS[e]] = fix_geometry(SHPS[e]) # Run Union again unShp = union(FIX_GEOM[SHPS[i]], FIX_GEOM[SHPS[e]], os.path.join(OUT_FOLDER, "un_{}_{}_f.shp".format(i, e)), api_gis="arcpy") elif GIS_SOFTWARE == "GRASS": # Optimized Union print "Union between {} and {}".format(SHPS[i], SHPS[e]) time_a = datetime.datetime.now().replace(microsecond=0) __unShp = optimized_union_anls( SHPS[i], SHPS[e], os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)), GRASS_REGION_TEMPLATE, SRS_CODE, os.path.join(OUT_FOLDER, "work_{}_{}".format(i, e)), multiProcess=True) time_b = datetime.datetime.now().replace(microsecond=0) print time_b - time_a # Rename cols unShp = rename_column( __unShp, { "a_" + __SHAPES_TO_COMPARE[SHPS[i]]: __SHAPES_TO_COMPARE[SHPS[i]], "b_" + __SHAPES_TO_COMPARE[SHPS[e]]: __SHAPES_TO_COMPARE[SHPS[e]] }, os.path.join(OUT_FOLDER, "un_{}_{}_rn.shp".format(i, e))) UNION_SHAPE[(SHPS[i], SHPS[e])] = unShp # Send data one more time to postgresql SYNTH_TBL = {} for uShp in UNION_SHAPE: # Send data to PostgreSQL union_tbl = shp_to_psql(conPARAM, UNION_SHAPE[uShp], SRS_CODE, api='shp2pgsql') # Produce table with % of area equal in both maps areaMapTbl = ntbl_by_query( conPARAM, "{}_syn".format(union_tbl), ("SELECT CAST('{lulc_1}' AS text) AS lulc_1, " "CAST('{lulc_2}' AS text) AS lulc_2, " "round(" "CAST(SUM(g_area) / 1000000 AS numeric), 4" ") AS agree_area, round(" "CAST((SUM(g_area) / MIN(total_area)) * 100 AS numeric), 4" ") AS agree_percentage, " "round(" "CAST(MIN(total_area) / 1000000 AS numeric), 4" ") AS total_area FROM (" "SELECT {map1_cls}, {map2_cls}, ST_Area(geom) AS g_area, " "CASE " "WHEN {map1_cls} = {map2_cls} " "THEN 1 ELSE 0 " "END AS isthesame, total_area FROM {tbl}, (" "SELECT SUM(ST_Area(geom)) AS total_area FROM {tbl}" ") AS foo2" ") AS foo WHERE isthesame = 1 " "GROUP BY isthesame").format( lulc_1=get_filename(uShp[0]), lulc_2=get_filename(uShp[1]), map1_cls=__SHAPES_TO_COMPARE[uShp[0]], map2_cls=__SHAPES_TO_COMPARE[uShp[1]], tbl=union_tbl), api='psql') # Produce confusion matrix for the pair in comparison lulcCls = query_to_df( conPARAM, ("SELECT fcol FROM (" "SELECT CAST({map1_cls} AS text) AS fcol FROM {tbl} " "GROUP BY {map1_cls} " "UNION ALL SELECT CAST({map2_cls} AS text) FROM {tbl} " "GROUP BY {map2_cls}" ") AS foo GROUP BY fcol ORDER BY fcol").format( tbl=union_tbl, map1_cls=__SHAPES_TO_COMPARE[uShp[0]], map2_cls=__SHAPES_TO_COMPARE[uShp[1]]), db_api='psql').fcol.tolist() matrixTbl = ntbl_by_query( conPARAM, "{}_matrix".format(union_tbl), ("SELECT * FROM crosstab('" "SELECT CASE " "WHEN foo.{map1_cls} IS NOT NULL " "THEN foo.{map1_cls} ELSE jtbl.flyr " "END AS lulc1_cls, CASE " "WHEN foo.{map2_cls} IS NOT NULL " "THEN foo.{map2_cls} ELSE jtbl.slyr " "END AS lulc2_cls, CASE " "WHEN foo.garea IS NOT NULL " "THEN round(CAST(foo.garea / 1000000 AS numeric)" ", 3) ELSE 0 " "END AS garea FROM (" "SELECT CAST({map1_cls} AS text) AS {map1_cls}, " "CAST({map2_cls} AS text) AS {map2_cls}, " "SUM(ST_Area(geom)) AS garea " "FROM {tbl} GROUP BY {map1_cls}, {map2_cls}" ") AS foo FULL JOIN (" "SELECT f.flyr, s.slyr FROM (" "SELECT CAST({map1_cls} AS text) AS flyr " "FROM {tbl} GROUP BY {map1_cls}" ") AS f, (" "SELECT CAST({map2_cls} AS text) AS slyr " "FROM {tbl} GROUP BY {map2_cls}" ") AS s" ") AS jtbl " "ON foo.{map1_cls} = jtbl.flyr AND " "foo.{map2_cls} = jtbl.slyr " "ORDER BY 1,2" "') AS ct(" "lulc_cls text, {crossCols}" ")").format(crossCols=", ".join( ["cls_{} numeric".format(c) for c in lulcCls]), tbl=union_tbl, map1_cls=__SHAPES_TO_COMPARE[uShp[0]], map2_cls=__SHAPES_TO_COMPARE[uShp[1]]), api='psql') SYNTH_TBL[uShp] = {"TOTAL": areaMapTbl, "MATRIX": matrixTbl} # UNION ALL TOTAL TABLES total_table = tbls_to_tbl(conPARAM, [SYNTH_TBL[k]["TOTAL"] for k in SYNTH_TBL], 'total_table') # Create table with % of agreement between each pair of maps mapsNames = query_to_df( conPARAM, ("SELECT lulc FROM (" "SELECT lulc_1 AS lulc FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc FROM {tbl} GROUP BY lulc_2" ") AS lu GROUP BY lulc ORDER BY lulc").format(tbl=total_table), db_api='psql').lulc.tolist() FLDS_TO_PIVOT = ["agree_percentage", "total_area"] Q = ("SELECT * FROM crosstab('" "SELECT CASE " "WHEN foo.lulc_1 IS NOT NULL THEN foo.lulc_1 ELSE jtbl.tmp1 " "END AS lulc_1, CASE " "WHEN foo.lulc_2 IS NOT NULL THEN foo.lulc_2 ELSE jtbl.tmp2 " "END AS lulc_2, CASE " "WHEN foo.{valCol} IS NOT NULL THEN foo.{valCol} ELSE 0 " "END AS agree_percentage FROM (" "SELECT lulc_1, lulc_2, {valCol} FROM {tbl} UNION ALL " "SELECT lulc_1, lulc_2, {valCol} FROM (" "SELECT lulc_1 AS lulc_2, lulc_2 AS lulc_1, {valCol} " "FROM {tbl}" ") AS tst" ") AS foo FULL JOIN (" "SELECT lulc_1 AS tmp1, lulc_2 AS tmp2 FROM (" "SELECT lulc_1 AS lulc_1 FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc_1 FROM {tbl} GROUP BY lulc_2" ") AS tst_1, (" "SELECT lulc_1 AS lulc_2 FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc_2 FROM {tbl} GROUP BY lulc_2" ") AS tst_2 WHERE lulc_1 = lulc_2 GROUP BY lulc_1, lulc_2" ") AS jtbl ON foo.lulc_1 = jtbl.tmp1 AND foo.lulc_2 = jtbl.tmp2 " "ORDER BY lulc_1, lulc_2" "') AS ct(" "lulc_map text, {crossCols}" ")") TOTAL_AGREE_TABLE = None TOTAL_AREA_TABLE = None for f in FLDS_TO_PIVOT: if not TOTAL_AGREE_TABLE: TOTAL_AGREE_TABLE = ntbl_by_query( conPARAM, "agreement_table", Q.format(tbl=total_table, valCol=f, crossCols=", ".join([ "{} numeric".format(map_) for map_ in mapsNames ])), api='psql') else: TOTAL_AREA_TABLE = ntbl_by_query( conPARAM, "area_table", Q.format(tbl=total_table, valCol=f, crossCols=", ".join([ "{} numeric".format(map_) for map_ in mapsNames ])), api='psql') # Union Mapping UNION_MAPPING = pandas.DataFrame( [[ get_filename(k[0]), get_filename(k[1]), get_filename(UNION_SHAPE[k]) ] for k in UNION_SHAPE], columns=['shp_a', 'shp_b', 'union_shp' ]) if GIS_SOFTWARE == "ARCGIS" else pandas.DataFrame( [[k[0], k[1], get_filename(UNION_SHAPE[k])] for k in UNION_SHAPE], columns=['shp_a', 'shp_b', 'union_shp']) UNION_MAPPING = df_to_db(conPARAM, UNION_MAPPING, 'union_map', api='psql') # Export Results TABLES = [UNION_MAPPING, TOTAL_AGREE_TABLE, TOTAL_AREA_TABLE ] + [SYNTH_TBL[x]["MATRIX"] for x in SYNTH_TBL] SHEETS = ["union_map", "agreement_percentage", "area_with_data_km"] + [ "{}_{}".format(get_filename(x[0])[:15], get_filename(x[1])[:15]) for x in SYNTH_TBL ] db_to_xls(conPARAM, ["SELECT * FROM {}".format(x) for x in TABLES], REPORT, sheetsNames=SHEETS, dbAPI='psql') return REPORT
def rst_pnt_to_build(osmLink, pntTable, polyTable, api_db='SQLITE'): """ Replace buildings with tag yes using the info in the Points Layer Only used for URBAN ATLAS and CORINE LAND COVER """ import datetime from gasp.sql.mng.tbl import row_num as cnt_row from gasp.fm.sql import query_to_df if api_db == 'POSTGIS': from gasp.to.shp.grs import psql_to_grs as db_to_shp else: from gasp.to.shp.grs import sqlite_to_shp as db_to_shp from gasp.sql.anls.ovlay import sgbd_get_feat_within from gasp.sql.anls.ovlay import sgbd_get_feat_not_within from gasp.to.rst import shp_to_raster time_a = datetime.datetime.now().replace(microsecond=0) new_build = sgbd_get_feat_within( osmLink, ("(SELECT buildings AS pnt_build, geometry AS pnt_geom " "FROM {} WHERE buildings IS NOT NULL)").format(pntTable), "pnt_geom", ("(SELECT buildings AS poly_build, geometry AS poly_geom " "FROM {} WHERE buildings IS NOT NULL)").format(polyTable), "poly_geom", "new_buildings", inTblCols="pnt_build AS cls", withinCols="poly_geom AS geometry", outTblIsFile=None, apiToUse="OGR_SPATIALITE" if api_db != "POSTGIS" else api_db) time_b = datetime.datetime.now().replace(microsecond=0) yes_build = sgbd_get_feat_not_within( osmLink, ("(SELECT buildings AS poly_build, geometry AS poly_geom " "FROM {} WHERE buildings IS NOT NULL)").format(polyTable), "poly_geom", ("(SELECT buildings AS pnt_build, geometry AS pnt_geom " "FROM {} WHERE buildings IS NOT NULL)").format(pntTable), "pnt_geom", "yes_builds", inTblCols="poly_geom AS geometry, 11 AS cls", outTblIsFile=None, apiToUse="OGR_SPATIALITE" if api_db != "POSTGIS" else api_db) time_c = datetime.datetime.now().replace(microsecond=0) resLayers = {} N11 = cnt_row(osmLink, yes_build, api='psql' if api_db == 'POSTGIS' else 'sqlite') time_d = datetime.datetime.now().replace(microsecond=0) if N11: # Data to GRASS GIS grsBuild11 = db_to_shp(osmLink, yes_build, "yes_builds", notTable=True, filterByReg=True) time_f = datetime.datetime.now().replace(microsecond=0) # To raster rstBuild11 = shp_to_raster(grsBuild11, 11, None, None, "rst_builds11", api="grass") time_g = datetime.datetime.now().replace(microsecond=0) resLayers[11] = [rstBuild11] else: time_f = None time_g = None # Add data into GRASS GIS lulcCls = query_to_df( osmLink, "SELECT cls FROM {} GROUP BY cls".format(new_build), db_api='psql' if api_db == 'POSTGIS' else 'sqlite').cls.tolist() timeGasto = { 0: ('intersect', time_b - time_a), 1: ('disjoint', time_c - time_b), 2: ('count_b11', time_d - time_c), 3: None if not time_f else ('import_b11', time_f - time_d), 4: None if not time_g else ('torst_b11', time_g - time_f), } tk = 5 for cls in lulcCls: time_x = datetime.datetime.now().replace(microsecond=0) shp = db_to_shp(osmLink, new_build, "nbuild_{}".format(str(cls)), "cls = {}".format(cls), notTable=True, filterByReg=True) time_y = datetime.datetime.now().replace(microsecond=0) rstb = shp_to_raster(shp, int(cls), None, None, "rst_nbuild_{}".format(str(cls)), api="grass") time_z = datetime.datetime.now().replace(microsecond=0) if int(cls) == 11 and int(cls) in resLayers: resLayers[int(cls)].append(rstb) else: resLayers[int(cls)] = [rstb] timeGasto[tk] = ('import_bn{}'.format(cls), time_y - time_x) timeGasto[tk + 1] = ('torst_bn{}'.format(cls), time_z - time_y) tk += 2 return resLayers, timeGasto
def db_to_tbl(conDB, tables, outTbl, txtDelimiter=None, dbAPI='psql', outTblF=None, sheetsNames=None): """ Database data to File table API's Avaialble: * psql; * sqlite; """ import os from gasp import goToList from gasp.fm.sql import query_to_df if tables == 'ALL': from gasp.sql.mng.tbl import lst_tbl tables = lst_tbl(conDB, schema='public', excludeViews=True, api=dbAPI) else: tables = goToList(tables) sheetsNames = goToList(sheetsNames) outTblF = None if not outTblF else outTblF \ if outTblF[0] == '.' else '.' + outTblF if len(tables) > 1: if not os.path.isdir(outTbl) or not outTblF: raise ValueError(( "When tables has more than one table, " "outTbl must be dir and outTblF must be specified" )) elif len(tables) == 1: if os.path.isdir(outTbl) and outTblF: outTbl = os.path.join(outTbl, tables[0] + outTblF) elif os.path.isdir(outTbl) and not outTbl: raise ValueError(( 'We find only a table to export and outTbl is a dir. ' 'Please give a path to a file or specify the table format ' 'using outTblF format' )) else: outTbl = outTbl else: raise ValueError( "tables value is not valid" ) DFS = [query_to_df( conDB, t if t.startswith( "SELECT") else "SELECT * FROM {}".format(t), db_api=dbAPI ) for t in tables] if os.path.isfile(outTbl): from gasp.oss import get_fileformat ff = get_fileformat(outTbl) if ff == '.xlsx' or ff == '.xls': obj_to_tbl(DFS, outTbl, sheetsName=sheetsNames, sanitizeUtf8=None) return outTbl for i in range(len(DFS)): obj_to_tbl( DFS[i], outTbl if len(DFS) == 1 else os.path.join( outTbl, tables[i] + outTblF ), delimiter=txtDelimiter, sheetsName=sheetsNames ) return outTbl