def db_to_db(db_a, db_b, typeDBA, typeDBB): """ All tables in one Database to other database Useful when we want to migrate a SQLITE DB to a PostgreSQL DB typesDB options: * sqlite * psql """ from glass.g.wt.sql import df_to_db from glass.ng.sql.q import q_to_obj from glass.ng.prop.sql import lst_tbl from glass.ng.sql.db import create_db # List Tables in DB A tbls = lst_tbl(db_a, excludeViews=True, api=typeDBA) # Create database B db_b = create_db(db_b, overwrite=False, api=typeDBB) # Table to Database B for tbl in tbls: df = q_to_obj( db_a, "SELECT * FROM {}".format(tbl), db_api=typeDBA ) df_to_db(db_b, df, tbl, append=None, api=typeDBB)
def tbl_fromdb_todb(from_db, to_db, tables, qForTbl=None, api='pandas'): """ Send PGSQL Tables from one database to other """ from glass.pys import obj_to_lst from glass.g.wt.sql import df_to_db api = 'pandas' if api != 'pandas' and api != 'psql' else api tables = obj_to_lst(tables) if api == 'pandas': from glass.ng.sql.q import q_to_obj for table in tables: if not qForTbl: tblDf = q_to_obj(from_db, "SELECT * FROM {}".format( table), db_api='psql') else: if table not in qForTbl: tblDf = q_to_obj(from_db, "SELECT * FROM {}".format( table), db_api='psql') else: tblDf = q_to_obj(from_db, qForTbl[table], db_api='psql') df_to_db(to_db, tblDf, table, api='psql') else: import os from glass.pys.oss import mkdir, del_folder from glass.ng.sql.bkup import dump_tbls from glass.ng.sql.db import restore_tbls tmpFolder = mkdir( os.path.dirname(os.path.abspath(__file__)), randName=True ) # Dump sqlScript = dump_tbls(from_db, tables, os.path.join( tmpFolder, "tables_data.sql" )) # Restore restore_tbls(to_db, sqlScript, tables) del_folder(tmpFolder)
def apndtbl_in_otherdb(db_a, db_b, tblA, tblB, mapCols, geomCol=None, srsEpsg=None, con_a=None, con_b=None): """ Append data of one table to another table in other database. """ from glass.ng.sql.q import q_to_obj from glass.g.wt.sql import df_to_db df = q_to_obj(db_a, "SELECT {} FROM {}".format( ", ".join(list(mapCols.keys())), tblA ), db_api='psql', geomCol=geomCol, epsg=srsEpsg, dbset=con_a) # Change Names df.rename(columns=mapCols, inplace=True) if geomCol: for k in mapCols: if geomCol == k: geomCol = mapCols[k] break # Get Geom Type # Send data to other database if geomCol and srsEpsg: from glass.g.prop.feat import get_gtype gType = get_gtype(df, geomCol=geomCol, gisApi='pandas') df_to_db( db_b, df, tblB, append=True, api='psql', epsg=srsEpsg, geomType=gType, colGeom=geomCol, dbset=con_b ) else: df_to_db(db_b, df, tblB, append=True, api='psql', dbset=con_b) return tblB
def name_circulations(db, GTFS_SCHEMA, OTHER_SCHEMA, output, other_db=None, serviceSchema=None, routeIdColName=None, tripIdColName=None): """ Get all circulations from GTFS and associate these circulations to other meta columns of other database GTFS_SCHEMA = { "TNAME" : "stop_times", "TRIP" : "trip_id", "STOP" : "stop_id", "SEQUENCE" : "stop_sequence", "DEPARTURE" : "departure_time" } OTHER_SCHEMA = { "TNAME" : "percursos_geom_v2", "ROUTE" : ["carreira", "variante", "sentido"], "SEQUENCE" : "ordem", "STOP" : "paragem" } serviceSchema = { "TRIPS" : { "TNAME" : "trips", "TRIP" : "trip_id", "SERVICE" : "service_id" }, "CALENDAR" : { "TNAME" : "calendar_dates", "SERVICE" : "service_id", "DATE" : "date" }, "FILTER_DAY" : 20180308 } """ import os from glass.pys import obj_to_lst from glass.ng.sql.q import q_to_obj other_db = db if not other_db else other_db # Sanitize Route ID in Other Schema OTHER_SCHEMA_ROUTE = obj_to_lst(OTHER_SCHEMA["ROUTE"]) if len(OTHER_SCHEMA_ROUTE) > 1: from glass.ng.sql.col import txt_cols_to_col ROUTE_COL = routeIdColName if routeIdColName else "fid_route" txt_cols_to_col(other_db, OTHER_SCHEMA["TNAME"], OTHER_SCHEMA_ROUTE, "|", ROUTE_COL) else: ROUTE_COL = routeIdColName if routeIdColName else \ OTHER_SCHEMA_ROUTE[0] """ Get all circulations in GTFS and their start time """ if serviceSchema: serviceSchema["FILTER_DAY"] = obj_to_lst(serviceSchema["FILTER_DAY"]) where = "" if not serviceSchema else (" WHERE {} ").format(" OR ".join([ "{}.{} = {}".format(serviceSchema["CALENDAR"]["TNAME"], serviceSchema["CALENDAR"]["DATE"], d) for d in serviceSchema["FILTER_DAY"] ])) injoinQ = "" if not serviceSchema else ( "INNER JOIN (" "SELECT {tripsTbl}.{tripsTripId} " "FROM {tripsTbl} INNER JOIN {calenTbl} ON " "{tripsTbl}.{tripsServId} = {calenTbl}.{calenServId}{whr} " "GROUP BY {tripsTbl}.{tripsTripId}" ") AS trip_service ON {stopTimeTbl}.{stopTimeTrip} " "= trip_service.{tripsTripId} ").format( tripsTbl=serviceSchema["TRIPS"]["TNAME"], tripsTripId=serviceSchema["TRIPS"]["TRIP"], tripsServId=serviceSchema["TRIPS"]["SERVICE"], calenTbl=serviceSchema["CALENDAR"]["TNAME"], calenServId=serviceSchema["CALENDAR"]["SERVICE"], stopTimeTbl=GTFS_SCHEMA["TNAME"], stopTimeTrip=GTFS_SCHEMA["TRIP"], whr=where) newTripCol = tripIdColName if tripIdColName else GTFS_SCHEMA["TRIP"] Q = ( "SELECT {stopTimesT}.{tripId} AS {newTrip}, " "array_agg({stopTimesT}.{stopId} " "ORDER BY {stopTimesT}.{tripId}, {stopTimesT}.{stopSq}) AS stops, " "array_agg({stopTimesT}.{stopSq} " "ORDER BY {stopTimesT}.{tripId}, {stopTimesT}.{stopSq}) AS stops_order, " "MIN({stopTimesT}.{depTime}) AS departure, " "MAX({stopTimesT}.{depTime}) AS depar_last_stop " "FROM {stopTimesT} {injoin}" "GROUP BY {stopTimesT}.{tripId}").format( tripId=GTFS_SCHEMA["TRIP"], stopId=GTFS_SCHEMA["STOP"], stopSq=GTFS_SCHEMA["SEQUENCE"], depTime=GTFS_SCHEMA["DEPARTURE"], stopTimesT=GTFS_SCHEMA["TNAME"], injoin=injoinQ, newTrip=newTripCol) circ = q_to_obj(db, Q) """ Get all routes metadata in the "Other Database/Table" """ Q = ("SELECT {idRoute}, " "array_agg({stopF} ORDER BY {idRoute}, {stopSq}) AS stops, " "array_agg({stopSq} ORDER BY {idRoute}, {stopSq}) AS stops_order " "FROM {t} GROUP BY {idRoute}").format(idRoute=ROUTE_COL, stopF=OTHER_SCHEMA["STOP"], stopSq=OTHER_SCHEMA["SEQUENCE"], t=OTHER_SCHEMA["TNAME"]) routes = q_to_obj(other_db, Q) def sanitizeDf(df, col): df[col] = df[col].astype(str) df[col] = df[col].str.replace('L', '') df[col] = df[col].str.replace(' ', '') df[col] = df[col].str.replace('[', '') df[col] = df[col].str.replace(']', '') return df circ = sanitizeDf(circ, "stops") routes = sanitizeDf(routes, "stops") newDf = circ.merge(routes, how='inner', left_on="stops", right_on="stops") if os.path.dirname(output): # Write XLS from glass.ng.wt import obj_to_tbl obj_to_tbl(newDf, output) else: # Send to pgsql from glass.g.wt.sql import df_to_db df_to_db(db, newDf, output, api='psql') return output
def tbl_to_db(tblFile, db, sqlTbl, delimiter=None, encoding_='utf-8', sheet=None, isAppend=None, api_db='psql', colsMap=None): """ Table file to Database Table API's available: * psql; * sqlite; """ import os from glass.pys import obj_to_lst from glass.pys.oss import fprop from glass.ng.rd import tbl_to_obj from glass.g.wt.sql import df_to_db if os.path.isdir(tblFile): from glass.pys.oss import lst_ff tbls = lst_ff(tblFile) else: tbls = obj_to_lst(tblFile) outSQLTbl = obj_to_lst(sqlTbl) RTBL = [] for i in range(len(tbls)): fp = fprop(tbls[i], ['fn', 'ff']) ff = fp['fileformat'] fn = fp['filename'] if ff == '.csv' or ff == '.txt' or ff == '.tsv': if not delimiter: raise ValueError(( "To convert TXT to DB table, you need to give a value for the " "delimiter input parameter" )) __enc = 'utf-8' if not encoding_ else encoding_ data = tbl_to_obj( tbls[i], _delimiter=delimiter, encoding_=__enc ) elif ff == '.dbf': data = tbl_to_obj(tbls[i]) elif ff == '.xls' or ff == '.xlsx': data = tbl_to_obj(tbls[i], sheet=sheet) elif ff == '.ods': if not sheet: raise ValueError(( "To convert ODS to DB table, you need to give a value " "for the sheet input parameter" )) data = tbl_to_obj(tbls[i], sheet=sheet) else: raise ValueError('{} is not a valid table format!'.format(ff)) if colsMap: data.rename(columns=colsMap, inplace=True) # Send data to database out_tbl = fn if not outSQLTbl else outSQLTbl[i] \ if i+1 <= len(tbls) else fn _rtbl = df_to_db( db, data, out_tbl, append=isAppend, api=api_db ) RTBL.append(_rtbl) return RTBL[0] if len(RTBL) == 1 else RTBL
def dsn_data_collection_by_multibuffer(inBuffers, workspace, db, datasource, keywords=None): """ Extract Digital Social Network Data for each sub-buffer in buffer. A sub-buffer is a buffer with a radius equals to the main buffer radius /2 and with a central point at North, South, East, West, Northeast, Northwest, Southwest and Southeast of the main buffer central point. inBuffers = { "lisbon" : { 'x' : -89004.994779, # in meters 'y' : -102815.866054, # in meters 'radius' : 10000, 'epsg' : 3763 }, "london : { 'x' : -14210.551441, # in meters 'y' : 6711542.47559, # in meters 'radius' : 10000, 'epsg' : 3857 } } or inBuffers = { "lisbon" : { "path" : /path/to/file.shp, "epsg" : 3763 } } keywords = ['flood', 'accident', 'fire apartment', 'graffiti', 'homeless'] datasource = 'facebook' or datasource = 'flickr' TODO: Only works for Flickr and Facebook """ import os from osgeo import ogr from glass.pys import obj_to_lst from glass.ng.sql.db import create_db from glass.ng.sql.q import q_to_ntbl from glass.g.wt.sql import df_to_db from glass.g.it.db import shp_to_psql from glass.g.it.shp import dbtbl_to_shp from glass.g.gp.prox.bfing import get_sub_buffers, dic_buffer_array_to_shp if datasource == 'flickr': from glass.g.acq.dsn.flickr import photos_location elif datasource == 'facebook': from glass.g.acq.dsn.fb.places import places_by_query keywords = obj_to_lst(keywords) keywords = ["None"] if not keywords else keywords # Create Database to Store Data create_db(db, overwrite=True, api='psql') for city in inBuffers: # Get Smaller Buffers if "path" in inBuffers[city]: # Get X, Y and Radius from glass.g.prop.feat.bf import bf_prop __bfprop = bf_prop(inBuffers[city]["path"], inBuffers[city]["epsg"], isFile=True) inBuffers[city]["x"] = __bfprop["X"] inBuffers[city]["y"] = __bfprop["Y"] inBuffers[city]["radius"] = __bfprop["R"] inBuffers[city]["list_buffer"] = [{ 'X': inBuffers[city]["x"], 'Y': inBuffers[city]["y"], 'RADIUS': inBuffers[city]['radius'], 'cardeal': 'major' }] + get_sub_buffers(inBuffers[city]["x"], inBuffers[city]["y"], inBuffers[city]["radius"]) # Smaller Buffers to File multiBuffer = os.path.join(workspace, 'buffers_{}.shp'.format(city)) dic_buffer_array_to_shp(inBuffers[city]["list_buffer"], multiBuffer, inBuffers[city]['epsg'], fields={'cardeal': ogr.OFTString}) # Retrive data for each keyword and buffer # Record these elements in one dataframe c = None tblData = None for bf in inBuffers[city]["list_buffer"]: for k in keywords: if datasource == 'flickr': tmpData = photos_location( bf, inBuffers[city]["epsg"], keyword=k if k != 'None' else None, epsg_out=inBuffers[city]["epsg"], onlySearchAreaContained=False) elif datasource == 'facebook': tmpData = places_by_query( bf, inBuffers[city]["epsg"], keyword=k if k != 'None' else None, epsgOut=inBuffers[city]["epsg"], onlySearchAreaContained=False) if type(tmpData) == int: print("NoData finded for buffer '{}' and keyword '{}'". format(bf['cardeal'], k)) continue tmpData["keyword"] = k tmpData["buffer_or"] = bf["cardeal"] if not c: tblData = tmpData c = 1 else: tblData = tblData.append(tmpData, ignore_index=True) inBuffers[city]["data"] = tblData # Get data columns names cols = inBuffers[city]["data"].columns.values dataColumns = [ c for c in cols if c != 'geom' and c != 'keyword' \ and c != 'buffer_or' and c != 'geometry' ] # Send data to PostgreSQL if 'geometry' in cols: cgeom = 'geometry' else: cgeom = 'geom' inBuffers[city]["table"] = 'tbldata_{}'.format(city) df_to_db(db, inBuffers[city]["data"], inBuffers[city]["table"], api='psql', epsg=inBuffers[city]["epsg"], geomType='POINT', colGeom=cgeom) # Send Buffers data to PostgreSQL inBuffers[city]["pg_buffer"] = shp_to_psql( db, multiBuffer, pgTable='buffers_{}'.format(city), api="shp2pgsql", srsEpsgCode=inBuffers[city]["epsg"]) inBuffers[city]["filter_table"] = q_to_ntbl( db, "filter_{}".format(inBuffers[city]["table"]), ("SELECT srcdata.*, " "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) " "AS intersect_buffer FROM (" "SELECT {cols}, keyword, geom, " "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer " "FROM {pgtable} " "GROUP BY {cols}, keyword, geom" ") AS srcdata, (" "SELECT cardeal, geom AS bfg FROM {bftable}" ") AS buffersg " "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS TRUE " "GROUP BY {cols}, keyword, geom, extracted_buffer").format( cols=", ".join(dataColumns), pgtable=inBuffers[city]["table"], bftable=inBuffers[city]["pg_buffer"]), api='psql') inBuffers[city]["outside_table"] = q_to_ntbl( db, "outside_{}".format(inBuffers[city]["table"]), ("SELECT * FROM (" "SELECT srcdata.*, " "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) " "AS not_intersect_buffer FROM (" "SELECT {cols}, keyword, geom, " "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer " "FROM {pgtable} " "GROUP BY {cols}, keyword, geom" ") AS srcdata, (" "SELECT cardeal, geom AS bfg FROM {bftable}" ") AS buffersg " "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS NOT TRUE " "GROUP BY {cols}, keyword, geom, extracted_buffer" ") AS foo WHERE array_length(not_intersect_buffer, 1) = 9" ).format(cols=", ".join(dataColumns), pgtable=inBuffers[city]["table"], bftable=inBuffers[city]["pg_buffer"]), api='psql') # Union these two tables inBuffers[city]["table"] = q_to_ntbl( db, "data_{}".format(city), ("SELECT * FROM {intbl} UNION ALL " "SELECT {cols}, keyword, geom, extracted_buffer, " "CASE WHEN array_length(not_intersect_buffer, 1) = 9 " "THEN '{array_symbol}' ELSE not_intersect_buffer END AS " "intersect_buffer FROM {outbl}").format( intbl=inBuffers[city]["filter_table"], outbl=inBuffers[city]["outside_table"], cols=", ".join(dataColumns), array_symbol='{' + '}'), api='psql') """ Get Buffers table with info related: -> pnt_obtidos = nr pontos obtidos usando esse buffer -> pnt_obtidos_fora = nt pontos obtidos fora desse buffer, mas obtidos com ele -> pnt_intersect = nt pontos que se intersectam com o buffer -> pnt_intersect_non_obtain = nr pontos que se intersectam mas nao foram obtidos como buffer """ inBuffers[city]["pg_buffer"] = q_to_ntbl( db, "dt_{}".format(inBuffers[city]["pg_buffer"]), ("SELECT main.*, get_obtidos.pnt_obtidos, " "obtidos_fora.pnt_obtidos_fora, intersecting.pnt_intersect, " "int_not_obtained.pnt_intersect_non_obtain " "FROM {bf_table} AS main " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.extracted_buffer) " "GROUP BY gid, cardeal" ") AS get_obtidos ON main.gid = get_obtidos.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos_fora " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.extracted_buffer) " "WHERE ST_Intersects(bf.geom, dt.geom) IS NOT TRUE " "GROUP BY gid, cardeal" ") AS obtidos_fora ON main.gid = obtidos_fora.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.intersect_buffer) " "GROUP BY gid, cardeal" ") AS intersecting ON main.gid = intersecting.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect_non_obtain " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.intersect_buffer) " "WHERE NOT (bf.cardeal = ANY(dt.extracted_buffer)) " "GROUP BY gid, cardeal" ") AS int_not_obtained " "ON main.gid = int_not_obtained.gid " "ORDER BY main.gid").format(bf_table=inBuffers[city]["pg_buffer"], dt_table=inBuffers[city]["table"]), api='psql') """ Get Points table with info related: -> nobtido = n vezes um ponto foi obtido -> obtido_e_intersect = n vezes um ponto foi obtido usando um buffer com o qual se intersecta -> obtido_sem_intersect = n vezes um ponto foi obtido usando um buffer com o qual nao se intersecta -> nintersect = n vezes que um ponto se intersecta com um buffer -> intersect_sem_obtido = n vezes que um ponto nao foi obtido apesar de se intersectar com o buffer """ inBuffers[city]["table"] = q_to_ntbl( db, "info_{}".format(city), ("SELECT {cols}, dt.keyword, dt.geom, " "CAST(dt.extracted_buffer AS text) AS extracted_buffer, " "CAST(dt.intersect_buffer AS text) AS intersect_buffer, " "array_length(extracted_buffer, 1) AS nobtido, " "SUM(CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END) AS obtido_e_intersect, " "(array_length(extracted_buffer, 1) - SUM(" "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END)) AS obtido_sem_intersect, " "array_length(intersect_buffer, 1) AS nintersect, " "(array_length(intersect_buffer, 1) - SUM(" "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END)) AS intersect_sem_obtido " "FROM {dt_table} AS dt " "INNER JOIN {bf_table} AS bf " "ON bf.cardeal = ANY(dt.extracted_buffer) " "GROUP BY {cols}, dt.keyword, dt.geom, " "dt.extracted_buffer, dt.intersect_buffer").format( dt_table=inBuffers[city]["table"], bf_table=inBuffers[city]["pg_buffer"], cols=", ".join(["dt.{}".format(x) for x in dataColumns])), api='psql') # Export Results dbtbl_to_shp(db, inBuffers[city]["table"], 'geom', os.path.join(workspace, "{}.shp".format(inBuffers[city]["table"])), api='psql', epsg=inBuffers[city]["epsg"]) dbtbl_to_shp(db, inBuffers[city]["pg_buffer"], 'geom', os.path.join( workspace, "{}.shp".format(inBuffers[city]["pg_buffer"])), api='psql', epsg=inBuffers[city]["epsg"]) return inBuffers
def check_shape_diff(SHAPES_TO_COMPARE, OUT_FOLDER, REPORT, DB, GRASS_REGION_TEMPLATE): """ Script to check differences between pairs of Feature Classes Suponha que temos diversas Feature Classes (FC) e que cada uma delas possui um determinado atributo; imagine tambem que, considerando todos os pares possiveis entre estas FC, se pretende comparar as diferencas na distribuicao dos valores desse atributo para cada par. * Dependencias: - GRASS; - PostgreSQL; - PostGIS. """ import datetime import os import pandas from glass.ng.sql.q import q_to_obj from glass.ng.it import db_to_tbl from glass.g.wt.sql import df_to_db from glass.g.dp.rst.toshp import rst_to_polyg from glass.g.it.db import shp_to_psql from glass.g.dp.tomtx import tbl_to_area_mtx from glass.g.prop import check_isRaster from glass.pys.oss import fprop from glass.ng.sql.db import create_db from glass.ng.sql.tbl import tbls_to_tbl from glass.ng.sql.q import q_to_ntbl # Check if folder exists, if not create it if not os.path.exists(OUT_FOLDER): from glass.pys.oss import mkdir mkdir(OUT_FOLDER, overwrite=None) else: raise ValueError('{} already exists!'.format(OUT_FOLDER)) from glass.g.wenv.grs import run_grass gbase = run_grass(OUT_FOLDER, grassBIN='grass78', location='shpdif', srs=GRASS_REGION_TEMPLATE) import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, OUT_FOLDER, 'shpdif', 'PERMANENT') from glass.g.it.shp import shp_to_grs, grs_to_shp from glass.g.it.rst import rst_to_grs from glass.g.tbl.col import rn_cols # Convert to SHAPE if file is Raster i = 0 _SHP_TO_COMPARE = {} for s in SHAPES_TO_COMPARE: isRaster = check_isRaster(s) if isRaster: # To GRASS rstName = fprop(s, 'fn') inRst = rst_to_grs(s, "rst_" + rstName, as_cmd=True) # To Vector d = rst_to_polyg(inRst, rstName, rstColumn="lulc_{}".format(i), gisApi="grass") # Export Shapefile shp = grs_to_shp(d, os.path.join(OUT_FOLDER, d + '.shp'), "area") _SHP_TO_COMPARE[shp] = "lulc_{}".format(i) else: # To GRASS grsV = shp_to_grs(s, fprop(s, 'fn'), asCMD=True) # Change name of column with comparing value ncol = "lulc_{}".format(str(i)) rn_cols(grsV, {SHAPES_TO_COMPARE[s]: "lulc_{}".format(str(i))}, api="grass") # Export shp = grs_to_shp(grsV, os.path.join(OUT_FOLDER, grsV + '_rn.shp'), "area") _SHP_TO_COMPARE[shp] = "lulc_{}".format(str(i)) i += 1 SHAPES_TO_COMPARE = _SHP_TO_COMPARE __SHAPES_TO_COMPARE = SHAPES_TO_COMPARE # Create database create_db(DB, api='psql') """ Union SHAPEs """ UNION_SHAPE = {} FIX_GEOM = {} SHPS = list(__SHAPES_TO_COMPARE.keys()) for i in range(len(SHPS)): for e in range(i + 1, len(SHPS)): # Optimized Union print("Union between {} and {}".format(SHPS[i], SHPS[e])) time_a = datetime.datetime.now().replace(microsecond=0) __unShp = optimized_union_anls( SHPS[i], SHPS[e], os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)), GRASS_REGION_TEMPLATE, os.path.join(OUT_FOLDER, "work_{}_{}".format(i, e)), multiProcess=True) time_b = datetime.datetime.now().replace(microsecond=0) print(time_b - time_a) # Rename cols unShp = rn_cols( __unShp, { "a_" + __SHAPES_TO_COMPARE[SHPS[i]]: __SHAPES_TO_COMPARE[SHPS[i]], "b_" + __SHAPES_TO_COMPARE[SHPS[e]]: __SHAPES_TO_COMPARE[SHPS[e]] }) UNION_SHAPE[(SHPS[i], SHPS[e])] = unShp # Send data to postgresql SYNTH_TBL = {} for uShp in UNION_SHAPE: # Send data to PostgreSQL union_tbl = shp_to_psql(DB, UNION_SHAPE[uShp], api='shp2pgsql') # Produce table with % of area equal in both maps areaMapTbl = q_to_ntbl( DB, "{}_syn".format(union_tbl), ("SELECT CAST('{lulc_1}' AS text) AS lulc_1, " "CAST('{lulc_2}' AS text) AS lulc_2, " "round(" "CAST(SUM(g_area) / 1000000 AS numeric), 4" ") AS agree_area, round(" "CAST((SUM(g_area) / MIN(total_area)) * 100 AS numeric), 4" ") AS agree_percentage, " "round(" "CAST(MIN(total_area) / 1000000 AS numeric), 4" ") AS total_area FROM (" "SELECT {map1_cls}, {map2_cls}, ST_Area(geom) AS g_area, " "CASE " "WHEN {map1_cls} = {map2_cls} " "THEN 1 ELSE 0 " "END AS isthesame, total_area FROM {tbl}, (" "SELECT SUM(ST_Area(geom)) AS total_area FROM {tbl}" ") AS foo2" ") AS foo WHERE isthesame = 1 " "GROUP BY isthesame").format( lulc_1=fprop(uShp[0], 'fn'), lulc_2=fprop(uShp[1], 'fn'), map1_cls=__SHAPES_TO_COMPARE[uShp[0]], map2_cls=__SHAPES_TO_COMPARE[uShp[1]], tbl=union_tbl), api='psql') # Produce confusion matrix for the pair in comparison matrixTbl = tbl_to_area_mtx(DB, union_tbl, __SHAPES_TO_COMPARE[uShp[0]], __SHAPES_TO_COMPARE[uShp[1]], union_tbl + '_mtx') SYNTH_TBL[uShp] = {"TOTAL": areaMapTbl, "MATRIX": matrixTbl} # UNION ALL TOTAL TABLES total_table = tbls_to_tbl(DB, [SYNTH_TBL[k]["TOTAL"] for k in SYNTH_TBL], 'total_table') # Create table with % of agreement between each pair of maps mapsNames = q_to_obj( DB, ("SELECT lulc FROM (" "SELECT lulc_1 AS lulc FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc FROM {tbl} GROUP BY lulc_2" ") AS lu GROUP BY lulc ORDER BY lulc").format(tbl=total_table), db_api='psql').lulc.tolist() FLDS_TO_PIVOT = ["agree_percentage", "total_area"] Q = ("SELECT * FROM crosstab('" "SELECT CASE " "WHEN foo.lulc_1 IS NOT NULL THEN foo.lulc_1 ELSE jtbl.tmp1 " "END AS lulc_1, CASE " "WHEN foo.lulc_2 IS NOT NULL THEN foo.lulc_2 ELSE jtbl.tmp2 " "END AS lulc_2, CASE " "WHEN foo.{valCol} IS NOT NULL THEN foo.{valCol} ELSE 0 " "END AS agree_percentage FROM (" "SELECT lulc_1, lulc_2, {valCol} FROM {tbl} UNION ALL " "SELECT lulc_1, lulc_2, {valCol} FROM (" "SELECT lulc_1 AS lulc_2, lulc_2 AS lulc_1, {valCol} " "FROM {tbl}" ") AS tst" ") AS foo FULL JOIN (" "SELECT lulc_1 AS tmp1, lulc_2 AS tmp2 FROM (" "SELECT lulc_1 AS lulc_1 FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc_1 FROM {tbl} GROUP BY lulc_2" ") AS tst_1, (" "SELECT lulc_1 AS lulc_2 FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc_2 FROM {tbl} GROUP BY lulc_2" ") AS tst_2 WHERE lulc_1 = lulc_2 GROUP BY lulc_1, lulc_2" ") AS jtbl ON foo.lulc_1 = jtbl.tmp1 AND foo.lulc_2 = jtbl.tmp2 " "ORDER BY lulc_1, lulc_2" "') AS ct(" "lulc_map text, {crossCols}" ")") TOTAL_AGREE_TABLE = None TOTAL_AREA_TABLE = None for f in FLDS_TO_PIVOT: if not TOTAL_AGREE_TABLE: TOTAL_AGREE_TABLE = q_to_ntbl( DB, "agreement_table", Q.format(tbl=total_table, valCol=f, crossCols=", ".join([ "{} numeric".format(map_) for map_ in mapsNames ])), api='psql') else: TOTAL_AREA_TABLE = q_to_ntbl(DB, "area_table", Q.format(tbl=total_table, valCol=f, crossCols=", ".join([ "{} numeric".format(map_) for map_ in mapsNames ])), api='psql') # Union Mapping UNION_MAPPING = pandas.DataFrame( [[k[0], k[1], fprop(UNION_SHAPE[k], 'fn')] for k in UNION_SHAPE], columns=['shp_a', 'shp_b', 'union_shp']) UNION_MAPPING = df_to_db(DB, UNION_MAPPING, 'union_map', api='psql') # Export Results TABLES = [UNION_MAPPING, TOTAL_AGREE_TABLE, TOTAL_AREA_TABLE ] + [SYNTH_TBL[x]["MATRIX"] for x in SYNTH_TBL] SHEETS = ["union_map", "agreement_percentage", "area_with_data_km"] + [ "{}_{}".format(fprop(x[0], 'fn')[:15], fprop(x[1], 'fn')[:15]) for x in SYNTH_TBL ] db_to_tbl(DB, ["SELECT * FROM {}".format(x) for x in TABLES], REPORT, sheetsNames=SHEETS, dbAPI='psql') return REPORT
def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE): """ Search for data in DSN and other platforms by cell """ import time from glass.g.rd.shp import shp_to_obj from glass.ng.sql.db import create_db from glass.g.acq.dsn.fb.places import places_by_query from glass.g.prj.obj import df_prj from glass.ng.pd import merge_df from glass.g.it.shp import dbtbl_to_shp from glass.ng.sql.q import q_to_ntbl from glass.g.wt.sql import df_to_db # Open GRID SHP GRID_DF = shp_to_obj(GRID_PNT) GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF GRID_DF["lng"] = GRID_DF.geometry.x.astype(float) GRID_DF["lat"] = GRID_DF.geometry.y.astype(float) GRID_DF["grid_id"] = GRID_DF.index # GET DATA RESULTS = [] def get_data(row, datasrc): if datasrc == 'facebook': d = places_by_query({ 'x': row.lng, 'y': row.lat, 'r': RADIUS }, 4326, keyword=None, epsgOut=EPSG, _limit='100', onlySearchAreaContained=None) else: raise ValueError( '{} as datasource is not a valid value'.format(datasrc)) if type(d) == int: return d['grid_id'] = row.grid_id RESULTS.append(d) time.sleep(5) GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1) RT = merge_df(RESULTS) # Create DB create_db(db, overwrite=True, api='psql') # Send Data to PostgreSQL df_to_db(db, RT, "{}_data".format(DATA_SOURCE), EPSG, "POINT", colGeom='geometry' if 'geometry' in RT.columns.values else 'geom') COLS = [ x for x in RT.columns.values if x != "geometry" and \ x != 'geom' and x != "grid_id" ] + ["geom"] GRP_BY_TBL = q_to_ntbl( db, "{}_grpby".format(DATA_SOURCE), ("SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id " "FROM {dtsrc}_data GROUP BY {cols}").format(cols=", ".join(COLS), dtsrc=DATA_SOURCE), api='psql') dbtbl_to_shp(db, GRP_BY_TBL, "geom", OUTPUT_TABLE, api="psql", epsg=EPSG) return OUTPUT_TABLE
def psql_to_djgdb(sql_dumps, db_name, djg_proj=None, mapTbl=None, userDjgAPI=None): """ Import PGSQL database in a SQL Script into the database controlled by one Django Project To work, the name of a model instance of type foreign key should be equal to the name of the 'db_column' clause. """ import os from glass.pys import __import from glass.pys import obj_to_lst from glass.ng.sql.db import restore_tbls from glass.ng.sql.db import create_db, drop_db from glass.ng.prop.sql import lst_tbl from glass.ng.sql.q import q_to_obj from glass.webg.djg.mdl.rel import order_mdl_by_rel from glass.webg.djg.mdl.i import lst_mdl_proj # Global variables TABLES_TO_EXCLUDE = [ 'geography_columns', 'geometry_columns', 'spatial_ref_sys', 'raster_columns', 'raster_columns', 'raster_overviews', 'pointcloud_formats', 'pointcloud_columns' ] # Several SQL Files are expected sql_scripts = obj_to_lst(sql_dumps) # Create Database tmp_db_name = db_name + '_xxxtmp' create_db(tmp_db_name) # Restore tables in SQL files for sql in sql_scripts: restore_tbls(tmp_db_name, sql) # List tables in the database tables = [x for x in lst_tbl(tmp_db_name, excludeViews=True, api='psql') ] if not mapTbl else mapTbl # Open Django Project if djg_proj: from glass.webg.djg import get_djgprj application = get_djgprj(djg_proj) # List models in project app_mdls = lst_mdl_proj(djg_proj, thereIsApp=True, returnClassName=True) data_tbl = {} for t in tables: if t == 'auth_user' or t == 'auth_group' or t == 'auth_user_groups': data_tbl[t] = t elif t.startswith('auth') or t.startswith('django'): continue elif t not in app_mdls or t in TABLES_TO_EXCLUDE: continue else: data_tbl["{}.models.{}".format(t.split('_')[0], app_mdls[t])] = t from django.contrib.gis.db import models mdl_cls = [ "{}.models.{}".format(m.split('_')[0], app_mdls[m]) for m in app_mdls ] orderned_table = order_mdl_by_rel(mdl_cls) # Add default tables of Django def_djg_tbl = [] if 'auth_group' in data_tbl: def_djg_tbl.append('auth_group') if 'auth_user' in data_tbl: def_djg_tbl.append('auth_user') if 'auth_user_groups' in data_tbl: def_djg_tbl.append('auth_user_groups') orderned_table = def_djg_tbl + orderned_table if userDjgAPI: for table in orderned_table: # Map pgsql table data tableData = q_to_obj(tmp_db_name, data_tbl[table], of='dict') # Table data to Django Model if table == 'auth_user': mdl_cls = __import('django.contrib.auth.models.User') elif table == 'auth_group': mdl_cls = __import('django.contrib.auth.models.Group') else: mdl_cls = __import(table) __mdl = mdl_cls() for row in tableData: for col in row: # Check if field is a foreign key field_obj = mdl_cls._meta.get_field(col) if not isinstance(field_obj, models.ForeignKey): # If not, use the value # But first check if value is nan (special type of float) if row[col] != row[col]: row[col] = None setattr(__mdl, col, row[col]) else: # If yes, use the model instance of the related table # Get model of the table related com aquela cujos dados # estao a ser restaurados related_name = field_obj.related_model.__name__ related_model = __import('{a}.models.{m}'.format( a=table.split('_')[0], m=related_name)) # If NULL, continue if not row[col]: setattr(__mdl, col, row[col]) continue related_obj = related_model.objects.get( pk=int(row[col])) setattr(__mdl, col, related_obj) __mdl.save() else: import pandas as pd from glass.ng.sql.q import q_to_obj from glass.g.wt.sql import df_to_db for tbl in orderned_table: if tbl not in data_tbl: continue data = q_to_obj(tmp_db_name, "SELECT * FROM {}".format(data_tbl[tbl])) if tbl == 'auth_user': data['last_login'] = pd.to_datetime(data.last_login, utc=True) data['date_joined'] = pd.to_datetime(data.date_joined, utc=True) df_to_db(db_name, data, data_tbl[tbl], append=True) drop_db(tmp_db_name)
def run_viewshed_by_cpu(tid, db, obs, dem, srs, vis_basename='vis', maxdst=None, obselevation=None): # Create Database new_db = create_db("{}_{}".format(db, str(tid)), api='psql') # Points to Database pnt_tbl = df_to_db( new_db, obs, 'pnt_tbl', api='psql', epsg=srs, geomType='Point', colGeom='geometry') # Create GRASS GIS Session workspace = mkdir(os.path.join( os.path.dirname(dem), 'work_{}'.format(str(tid)) )) loc_name = 'vis_loc' gbase = run_grass(workspace, location=loc_name, srs=dem) # Start GRASS GIS Session import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, workspace, loc_name, 'PERMANENT') from glass.g.it.rst import rst_to_grs, grs_to_rst from glass.g.rst.surf import grs_viewshed from glass.g.deldt import del_rst # Send DEM to GRASS GIS grs_dem = rst_to_grs(dem, 'grs_dem', as_cmd=True) # Produce Viewshed for each point in obs for idx, row in obs.iterrows(): # Get Viewshed raster vrst = grs_viewshed( grs_dem, (row.geometry.x, row.geometry.y), '{}_{}'.format(vis_basename, str(row[obs_id])), max_dist=maxdst, obs_elv=obselevation ) # Export Raster to File frst = grs_to_rst(vrst, os.path.join(workspace, vrst + '.tif')) # Raster to Array img = gdal.Open(frst) num = img.ReadAsArray() # Two Dimension to One Dimension # Reshape Array numone = num.reshape(num.shape[0] * num.shape[1]) # Get Indexes with visibility visnum = np.arange(numone.shape[0]).astype(np.uint32) visnum = visnum[numone == 1] # Get Indexes intervals visint = get_minmax_fm_seq_values(visnum) # Get rows indexes _visint = visint.reshape(visint.shape[0] * visint.shape[1]) visrow = _visint / num.shape[1] visrow = visrow.astype(np.uint32) # Get cols indexes viscol = _visint - (visrow * num.shape[1]) # Reshape visrow = visrow.reshape(visint.shape) viscol = viscol.reshape(visint.shape) # Split array irow, erow = np.vsplit(visrow.T, 1)[0] icol, ecol = np.vsplit(viscol.T, 1)[0] # Visibility indexes to Pandas DataFrame idxnum = np.full(irow.shape, row[obs_id]) visdf = pd.DataFrame({ 'pntid' : idxnum, 'rowi' : irow, 'rowe' : erow, 'coli': icol, 'cole' : ecol }) # Pandas DF to database # Create Visibility table df_to_db( new_db, visdf, vis_basename, api='psql', colGeom=None, append=None if not idx else True ) # Delete all variables numone = None visnum = None visint = None _visint = None visrow = None viscol = None irow = None erow = None icol = None ecol = None idxnum = None visdf = None del img # Delete GRASS GIS File del_rst(vrst) # Delete TIFF File del_file(frst) frst = None
def search_by_keyword(db, out_tbl, qarea, wgrp=None): """ Get data using keywords """ import os import pandas as pd from multiprocessing import Process, Manager from glass.cons.dsn import search_words, tw_key from glass.ng.pd import merge_df from glass.ng.pd.split import df_split from glass.g.wt.sql import df_to_db # Get API Keys keys = tw_key() # Get search words words = search_words(group=wgrp) # Split search words search_words = [words] if len(keys) == 1 else df_split(words, len(keys)) # Search for data with Manager() as manager: DFS = manager.list() LOG_LST = manager.list() DROP_COLS = ["retweeted"] # Create Threads thrds = [Process( name='tk{}'.format(str(i)), target=get_tweets, args=(DFS, LOG_LST, search_words[i], qarea, keys[i], DROP_COLS, i) ) for i in range(len(search_words))] for t in thrds: t.start() for t in thrds: t.join() if not len(DFS): raise ValueError('NoData was collected!') # Merge all dataframes if len(DFS) == 1: all_df = DFS[0] else: all_df = merge_df(DFS, ignIndex=True, ignoredfstype=True) all_df.rename(columns={"user" : "username"}, inplace=True) # Sanitize time reference all_df['daytime'] = pd.to_datetime(all_df.tweet_time) all_df.daytime = all_df.daytime.astype(str) all_df.daytime = all_df.daytime.str.slice(start=0, stop=-6) all_df.drop('tweet_time', axis=1, inplace=True) # Rename cols all_df.rename(columns={ 'text' : 'txt', 'tweet_lang' : 'tlang', 'user_id' : 'userid', 'user_location' : 'userloc', 'place_country' : 'placecountry', 'place_countryc' : 'placecountryc', 'place_name' : 'placename', 'place_box' : 'placebox', 'place_id' : 'placeid', 'followers_count' : 'followersn' }, inplace=True) # Data to new table df_to_db(db, all_df, out_tbl, append=True, api='psql') # Write log file log_txt = os.path.join( os.path.dirname(os.path.abspath(__file__)), '{}-log.txt'.format(out_tbl) ) with open(log_txt, 'w') as f: f.write("\n".join(LOG_LST)) return log_txt
os.path.join(workspace, 'fgrid_{}_{}.shp'.format(row[idcol], str(i + 1))), 'area') clpfnet = shp_to_obj(clp_ffnet, outgeom='geom') clpfnet = del_cols_notin_ref(clpfnet, list(cols_b.keys()), geomCol="geom") clpfnet.rename(columns=cols_b, inplace=True) main_df = main_df.append(clpfnet, ignore_index=True, sort=False) main_df['id'] = main_df.index main_df['areag'] = main_df.geom.area main_df = main_df[main_df.areag > 1] main_df.drop(['sid', 'areag'], axis=1, inplace=True) # Send data to database df_to_db(db, main_df, grid_tbl, append=True, api='psql', epsg=epsg, geomType="Polygon", colGeom='geom', dbset=setdb) obj_to_shp(main_df, 'geom', epsg, out_grid)
def shp_to_psql(dbname, shpData, pgTable=None, api="pandas", mapCols=None, srsEpsgCode=None, encoding="UTF-8", dbset='default'): """ Send Shapefile to PostgreSQL if api is equal to "pandas" - GeoPandas API will be used; if api is equal to "shp2pgsql" - shp2pgsql tool will be used. """ import os from glass.pys.oss import fprop from glass.g.prop.prj import get_shp_epsg # If defined, srsEpsgCode must be a integer value if srsEpsgCode: if type(srsEpsgCode) != int: raise ValueError('srsEpsgCode should be a integer value') if api == "pandas": from glass.ng.rd import tbl_to_obj from glass.g.wt.sql import df_to_db from glass.g.prop.feat import get_gtype elif api == "shp2pgsql": from glass.pys import execmd from glass.ng.sql import psql_cmd from glass.pys.oss import del_file else: raise ValueError( 'api value is not valid. options are: pandas and shp2pgsql') # Check if shp is folder if os.path.isdir(shpData): from glass.pys.oss import lst_ff shapes = lst_ff(shpData, file_format='.shp') else: from glass.pys import obj_to_lst shapes = obj_to_lst(shpData) epsgs = [get_shp_epsg(i) for i in shapes] if not srsEpsgCode else [srsEpsgCode] if None in epsgs: raise ValueError( ("Cannot obtain EPSG code. Use the srsEpsgCode parameter " "to specify the EPSG code of your data.")) tables = [] for _i in range(len(shapes)): # Get Table name tname = fprop(shapes[_i], 'fn', forceLower=True) if not pgTable else \ pgTable[_i] if type(pgTable) == list else pgTable if len(shapes) == 1 \ else pgTable + '_{}'.format(_i+1) # Import data if api == "pandas": # SHP to DataFrame df = tbl_to_obj(shapes[_i]) if not mapCols: df.rename(columns={x: x.lower() for x in df.columns.values}, inplace=True) else: renameD = { x : mapCols[x].lower() if x in mapCols else \ x.lower() for x in df.columns.values } df.rename(columns=renameD, inplace=True) if "geometry" in df.columns.values: geomCol = "geometry" elif "geom" in df.columns.values: geomCol = "geom" else: raise ValueError("No Geometry found in shp") # GeoDataFrame to PSQL df_to_db(dbname, df, tname, append=True, api='psql', epsg=epsgs[_i] if not srsEpsgCode else srsEpsgCode, colGeom=geomCol, geomType=get_gtype(shapes[_i], name=True, py_cls=False, gisApi='ogr')) else: sql_script = os.path.join(os.path.dirname(shapes[_i]), tname + '.sql') cmd = ('shp2pgsql -I -s {epsg} -W {enc} ' '{shp} public.{name} > {out}').format( epsg=epsgs[_i] if not srsEpsgCode else srsEpsgCode, shp=shapes[_i], name=tname, out=sql_script, enc=encoding) outcmd = execmd(cmd) psql_cmd(dbname, sql_script, dbcon=dbset) del_file(sql_script) tables.append(tname) return tables[0] if len(tables) == 1 else tables