def rstfld_to_slope(rst_folder, dclv_folder, out_name, perc_folder=None): """ Run slope for each raster in folder """ import os import pandas as pd import re import multiprocessing as mp from glass.g.wenv.grs import run_grass from glass.pys.oss import cpu_cores, lst_ff from glass.pys.oss import fprop from glass.ng.pd.split import df_split def run_slope(tid, inrsts, outfolder, oname, percentage): """ Thread function """ iirsts = inrsts.mdt.tolist() # Create GRASS GIS Location loc_name = f'thread_{str(tid)}' gbase = run_grass( outfolder, location=loc_name, srs=iirsts[0] ) # Start GRASS GIS Session import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, outfolder, loc_name, 'PERMANENT') from glass.g.it.rst import rst_to_grs, grs_to_rst from glass.g.rst.surf import slope from glass.g.wenv.grs import rst_to_region for rst in iirsts: # Import data mdt = rst_to_grs(rst, fprop(rst, 'fn')) # Set region rst_to_region(mdt) # Get ID in name mdt_id = re.search(r'\d+', mdt).group() # Get slope if percentage: slope_perc = slope( mdt, f"pp_{oname}_{mdt_id}", data='percent' ) slope_degr = slope( mdt, f"{oname}_{mdt_id}", data='degrees' ) # Export if percentage: grs_to_rst(slope_perc, os.path.join( percentage, slope_degr + '.tif' )) grs_to_rst(slope_degr, os.path.join( outfolder, slope_degr + '.tif' )) # List Rasters rsts = pd.DataFrame( lst_ff(rst_folder, file_format='.tif'), columns=['mdt'] ) # Split rasters by threads ncpu = cpu_cores() dfs = df_split(rsts, ncpu) # Run slope using multiprocessing thrds = [mp.Process( target=run_slope, name=f"th_{str(i)}", args=(i+1, dfs[i], dclv_folder, out_name, perc_folder) ) for i in range(len(dfs))] for t in thrds: t.start() for t in thrds: t.join() return dclv_folder
def thrd_dem(countours_folder, ref_folder, dem_folder, attr, refFormat='.tif', countoursFormat='.shp', demFormat='.tif', cellsize=10, masksFolder=None, masksFormat='.tif', method="COUNTOURS"): """ Produce DEM using GRASS GIS for all Feature Classes in countours_Folder E.g. countours_folder 1 | data_1.shp 2 | data_2.shp E.g. ref_folder 1 | lmt_dem_1.tif 2 | lmt_dem_2.tif Filenames must have their id before the extension; '_' must be used to separate id from basename. Methods Available: * IDW; * BSPLINE; * SPLINE; * CONTOUR; """ import os import multiprocessing as mp import pandas as pd from glass.pys.oss import cpu_cores, lst_ff, mkdir from glass.ng.pd.split import df_split # List Ref Files ref = [[ int(l.split('.')[0].split('_')[-1]), l ] for l in lst_ff( ref_folder, file_format=refFormat, rfilename=True )] # List Countours Files countours = [[ int(c.split('.')[0].split('_')[-1]), c ] for c in lst_ff( countours_folder, file_format=countoursFormat, rfilename=True )] # List masks if necessary masks = None if not masksFolder else [[ int(m.split('.')[0].split('_')[-1]), m ] for m in lst_ff( masksFolder, file_format=masksFormat, rfilename=True )] # Produce DataFrame to better mapping df = pd.DataFrame(ref, columns=['fid', 'ref']) jdf = pd.DataFrame(countours, columns=['jfid', 'countours']) df = df.merge(jdf, how='left', left_on='fid', right_on='jfid') # Add masks meta to df if masksFolder: mdf = pd.DataFrame(masks, columns=['mfid', 'masks']) df = df.merge(mdf, how='left', left_on='fid', right_on='mfid') # List DEMs already produced dems = lst_ff(dem_folder, file_format=demFormat, rfilename=True) # Delete rows when dem already exists def check_dem_exists(row): # Get DEM name dem_f = 'dem_{}{}'.format(str(row.fid), demFormat) row['exists'] = 1 if dem_f in dems else 0 return row df = df.apply(lambda x: check_dem_exists(x), axis=1) df = df[df.exists == 0] # Split Dfs n_cpu = cpu_cores() dfs = df_split(df, n_cpu) # Function to produce DEM def prod_dem(_df): for idx, row in _df.iterrows(): # Get DEM name dem_f = 'dem_{}{}'.format(str(row.fid), demFormat) # Get GRASS GIS Workspace gw = mkdir(os.path.join( ref_folder, 'gw_{}'.format(str(row.fid)) ), overwrite=True) # Get mask msk = None if not masksFolder else None if pd.isna(row.masks)\ else os.path.join(masksFolder, row.masks) # Produce DEM make_dem(gw, os.path.join(countours_folder, row.countours), attr, os.path.join(dem_folder, dem_f), os.path.join(ref_folder, row.ref), method="CONTOUR", cell_size=cellsize, mask=msk ) # Produce DEM thrds = [mp.Process( target=prod_dem, name='th-{}'.format(str(i+1)), args=(dfs[i],) ) for i in range(len(dfs))] for t in thrds: t.start() for t in thrds: t.join()
def get_not_used_tags(OSM_FILE, OUT_TBL): """ Use a file OSM to detect tags not considered in the OSM2LULC procedure """ import os from glass.ng.wt import obj_to_tbl from glass.g.tbl.filter import sel_by_attr from glass.ng.sql.q import q_to_obj from glass.ng.pd.split import df_split from glass.pys.oss import fprop from glass.g.it.osm import osm_to_gpkg OSM_TAG_MAP = { "DB" : os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'osmtolulc.sqlite' ), "OSM_FEAT" : "osm_features", "KEY_COL" : "key", "VALUE_COL" : "value", "GEOM_COL" : "geom" } WORKSPACE = os.path.dirname(OUT_TBL) sqdb = osm_to_gpkg(OSM_FILE, os.path.join( WORKSPACE, fprop(OSM_FILE, 'fn') + '.gpkg' )) # Get Features we are considering ourOSMFeatures = q_to_obj(OSM_TAG_MAP["DB"], ( "SELECT {key} AS key_y, {value} AS value_y, {geom} AS geom_y " "FROM {tbl}" ).format( key=OSM_TAG_MAP["KEY_COL"], value=OSM_TAG_MAP["VALUE_COL"], geom=OSM_TAG_MAP["GEOM_COL"], tbl=OSM_TAG_MAP["OSM_FEAT"] ), db_api='sqlite') # Get Features in File TABLES_TAGS = { 'points' : ['highway', 'man_made', 'building'], 'lines' : ['highway', 'waterway', 'aerialway', 'barrier', 'man_made', 'railway'], 'multipolygons' : ['aeroway', 'amenity', 'barrier', 'building', 'craft', 'historic', 'land_area', '' 'landuse', 'leisure', 'man_made', 'military', 'natural', 'office', 'place', 'shop', 'sport', 'tourism', 'waterway', 'power', 'railway', 'healthcare', 'highway'] } Qs = [ " UNION ALL ".join([( "SELECT '{keycol}' AS key, {keycol} AS value, " "'{geomtype}' AS geom FROM {tbl} WHERE " "{keycol} IS NOT NULL" ).format( keycol=c, geomtype='Point' if table == 'points' else 'Line' \ if table == 'lines' else 'Polygon', tbl=table ) for c in TABLES_TAGS[table]]) for table in TABLES_TAGS ] fileOSMFeatures = q_to_obj(sqdb, ( "SELECT key, value, geom FROM ({}) AS foo " "GROUP BY key, value, geom" ).format(" UNION ALL ".join(Qs)), db_api='sqlite') _fileOSMFeatures = fileOSMFeatures.merge( ourOSMFeatures, how='outer', left_on=["key", "value", "geom"], right_on=["key_y", "value_y", "geom_y"] ) # Select OSM Features of file without correspondence _fileOSMFeatures["isnew"] =_fileOSMFeatures.key_y.fillna(value='nenhum') newTags = _fileOSMFeatures[_fileOSMFeatures.isnew == 'nenhum'] newTags["value"] = newTags.value.str.replace("'", "''") newTags["whr"] = newTags.key + "='" + newTags.value + "'" # Export tags not being used to new shapefile def to_regular_str(row): san_str = row.whr row["whr_san"] = san_str return row for t in TABLES_TAGS: if t == 'points': filterDf = newTags[newTags.geom == 'Point'] elif t == 'lines': filterDf = newTags[newTags.geom == 'Line'] elif t == 'multipolygons': filterDf = newTags[newTags.geom == 'Polygon'] if filterDf.shape[0] > 500: dfs = df_split(filterDf, 500, nrows=True) else: dfs = [filterDf] Q = "SELECT * FROM {} WHERE {}".format( t, filterDf.whr.str.cat(sep=" OR ")) i = 1 for df in dfs: fn = t + '.shp' if len(dfs) == 1 else '{}_{}.shp'.format( t, str(i) ) try: shp = sel_by_attr(sqdb, Q.format( t, df.whr.str.cat(sep=" OR ") ), os.path.join(WORKSPACE, fn), api_gis='ogr') except: __df = df.apply(lambda x: to_regular_str(x), axis=1) shp = sel_by_attr(sqdb, Q.format( t, __df.whr.str.cat(sep=" OR ") ), os.path.join(WORKSPACE, fn)) i += 1 # Export OUT_TBL with tags not being used newTags.drop(['key_y', 'value_y', 'geom_y', 'isnew', 'whr'], axis=1, inplace=True) obj_to_tbl(newTags, OUT_TBL, sheetsName="new_tags", sanitizeUtf8=True) return OUT_TBL
def closest_facility(incidents, incidents_id, facilities, output, impedance='TravelTime'): """ impedance options: * TravelTime; * WalkTime; """ import requests import pandas as pd import numpy as np from glass.cons.esri import rest_token, CF_URL from glass.g.it.esri import json_to_gjson from glass.g.rd.shp import shp_to_obj from glass.g.wt.shp import df_to_shp from glass.ng.pd.split import df_split from glass.ng.pd import merge_df from glass.g.prop.prj import get_shp_epsg from glass.g.prj.obj import df_prj from glass.g.it.pd import df_to_geodf from glass.g.it.pd import json_obj_to_geodf from glass.cons.esri import get_tv_by_impedancetype # Get API token token = rest_token() # Data to Pandas DataFrames fdf = shp_to_obj(facilities) idf = shp_to_obj(incidents) # Re-project to WGS84 fdf = df_prj(fdf, 4326) idf = df_prj(idf, 4326) # Geomtries to Str - inputs for requests fdf['coords'] = fdf.geometry.x.astype(str) + ',' + fdf.geometry.y.astype( str) idf['coords'] = idf.geometry.x.astype(str) + ',' + idf.geometry.y.astype( str) # Delete geometry from facilities DF idf.drop(['geometry'], axis=1, inplace=True) # Split data # ArcGIS API only accepts 100 facilities # # and 100 incidents in each request fdfs = df_split(fdf, 100, nrows=True) if fdf.shape[0] > 100 else [fdf] idfs = df_split(idf, 100, nrows=True) if idf.shape[0] > 100 else [idf] for i in range(len(idfs)): idfs[i].reset_index(inplace=True) idfs[i].drop(['index'], axis=1, inplace=True) for i in range(len(fdfs)): fdfs[i].reset_index(inplace=True) fdfs[i].drop(['index'], axis=1, inplace=True) # Get travel mode tv = get_tv_by_impedancetype(impedance) # Ask for results results = [] drop_cols = [ 'ObjectID', 'FacilityID', 'FacilityRank', 'Name', 'IncidentCurbApproach', 'FacilityCurbApproach', 'IncidentID', 'StartTime', 'EndTime', 'StartTimeUTC', 'EndTimeUTC', 'Total_Minutes', 'Total_TruckMinutes', 'Total_TruckTravelTime', 'Total_Miles' ] if impedance == 'WalkTime': tv_col = 'walktime' rn_cols = {'Total_WalkTime': tv_col} ndrop = ['Total_Kilometers', 'Total_TravelTime', 'Total_Minutes'] elif impedance == 'metric': tv_col = 'kilomts' rn_cols = {'Total_Kilometers': tv_col} ndrop = ['Total_WalkTime', 'Total_TravelTime', 'Total_Minutes'] else: tv_col = 'traveltime' rn_cols = {'Total_TravelTime': tv_col} ndrop = ['Total_Kilometers', 'Total_WalkTime', 'Total_Minutes'] drop_cols.extend(ndrop) for i_df in idfs: incidents_str = i_df.coords.str.cat(sep=';') for f_df in fdfs: facilities_str = f_df.coords.str.cat(sep=';') # Make request r = requests.get(CF_URL, params={ 'facilities': facilities_str, 'incidents': incidents_str, 'token': token, 'f': 'json', 'travelModel': tv, 'defaultTargetFacilityCount': '1', 'returnCFRoutes': True, 'travelDirection': 'esriNATravelDirectionToFacility', 'impedanceAttributeName': impedance }) if r.status_code != 200: raise ValueError('Error when requesting from: {}'.format( str(r.url))) # Convert ESRI json to GeoJson esri_geom = r.json() geom = json_to_gjson(esri_geom.get('routes')) # GeoJSON to GeoDataFrame gdf = json_obj_to_geodf(geom, 4326) # Delete unwanted columns gdf.drop(drop_cols, axis=1, inplace=True) # Rename some interest columns gdf.rename(columns=rn_cols, inplace=True) # Add to results original attributes of incidents r_df = gdf.merge(i_df, how='left', left_index=True, right_index=True) results.append(r_df) # Compute final result # Put every DataFrame in a single DataFrame fgdf = merge_df(results) # Since facilities were divided # The same incident has several "nearest" facilities # We just want one neares facility # Lets group by using min operator gpdf = pd.DataFrame(fgdf.groupby([incidents_id]).agg({tv_col: 'min' })).reset_index() gpdf.rename(columns={incidents_id: 'iid', tv_col: 'impcol'}, inplace=True) # Recovery geometry fgdf = fgdf.merge(gpdf, how='left', left_on=incidents_id, right_on='iid') fgdf = fgdf[fgdf[tv_col] == fgdf.impcol] fgdf = df_to_geodf(fgdf, 'geometry', 4326) # Remove repeated units g = fgdf.groupby('iid') fgdf['rn'] = g[tv_col].rank(method='first') fgdf = fgdf[fgdf.rn == 1] fgdf.drop(['iid', 'rn'], axis=1, inplace=True) # Re-project to original SRS epsg = get_shp_epsg(facilities) fgdf = df_prj(fgdf, epsg) # Export result df_to_shp(fgdf, output) return output
'/home/jasp/mrgis/cos_union/cos18/shape7.shp', '/home/jasp/mrgis/cos_union/cos95/shape7.shp' ], [ '/home/jasp/mrgis/cos_union/cos18/shape8.shp', '/home/jasp/mrgis/cos_union/cos95/shape8.shp' ], ] outshp = '/home/jasp/mrgis/cos_union/result' srs_epsg = 3763 cpu_n = cpu_cores() / 2 df_shp = pd.DataFrame(shp_pairs, columns=['shp_a', 'shp_b']) dfs = df_split(df_shp, cpu_n) thrds = [ mp.Process(target=multi_run, name='th-{}'.format(str(i + 1)), args=(i + 1, dfs[i], outshp)) for i in range(len(dfs)) ] for t in thrds: t.start() for t in thrds: t.join()
def service_areas(facilities, breaks, output, impedance='TravelTime'): """ Produce Service Areas Polygons """ import requests from glass.cons.esri import rest_token, SA_URL from glass.g.rd.shp import shp_to_obj from glass.g.prj.obj import df_prj from glass.g.it.esri import json_to_gjson from glass.g.it.pd import json_obj_to_geodf from glass.g.wt.shp import df_to_shp from glass.cons.esri import get_tv_by_impedancetype from glass.ng.pd.split import df_split from glass.ng.pd import merge_df from glass.g.prop.prj import get_shp_epsg # Get Token token = rest_token() # Get data pntdf = shp_to_obj(facilities) pntdf = df_prj(pntdf, 4326) pntdf['coords'] = pntdf.geometry.x.astype( str) + ',' + pntdf.geometry.y.astype(str) pntdf.drop(['geometry'], axis=1, inplace=True) dfs = df_split(pntdf, 100, nrows=True) # Make requests gdfs = [] for df in dfs: facilities_str = df.coords.str.cat(sep=';') tv = get_tv_by_impedancetype(impedance) r = requests.get( SA_URL, params={ 'facilities': facilities_str, 'token': token, 'f': 'json', 'travelModel': tv, 'defaultBreaks': ','.join(breaks), 'travelDirection': 'esriNATravelDirectionToFacility', #'travelDirection' : 'esriNATravelDirectionFromFacility', 'outputPolygons': 'esriNAOutputPolygonDetailed', 'impedanceAttributeName': impedance }) if r.status_code != 200: raise ValueError('Error when requesting from: {}'.format(str( r.url))) esri_geom = r.json() geom = json_to_gjson(esri_geom.get('saPolygons')) gdf = json_obj_to_geodf(geom, 4326) gdf = gdf.merge(df, how='left', left_index=True, right_index=True) gdfs.append(gdf) # Compute final result fgdf = merge_df(gdfs) epsg = get_shp_epsg(facilities) fgdf = df_prj(fgdf, epsg) df_to_shp(fgdf, output) return output
def thrd_viewshed_v2(dbname, dem, pnt_obs, obs_id): """ Compute Viewshed for all points in pnt_obs using a multiprocessing approach """ import os import pandas as pd import numpy as np from osgeo import gdal import multiprocessing as mp from glass.g.rd.shp import shp_to_obj from glass.pys.oss import cpu_cores, mkdir from glass.ng.pd.split import df_split from glass.g.wenv.grs import run_grass from glass.g.prop.prj import get_shp_epsg from glass.g.wt.sql import df_to_db from glass.pys.oss import del_file from glass.ng.sql.db import create_db from glass.pys.num import get_minmax_fm_seq_values # Get Work EPSG epsg = get_shp_epsg(pnt_obs) # Points to DataFrame obs_df = shp_to_obj(pnt_obs) # Split DF by the number of cores n_cpu = cpu_cores() dfs = df_split(obs_df, n_cpu) def run_viewshed_by_cpu(tid, db, obs, dem, srs, vis_basename='vis', maxdst=None, obselevation=None): # Create Database new_db = create_db("{}_{}".format(db, str(tid)), api='psql') # Points to Database pnt_tbl = df_to_db( new_db, obs, 'pnt_tbl', api='psql', epsg=srs, geomType='Point', colGeom='geometry') # Create GRASS GIS Session workspace = mkdir(os.path.join( os.path.dirname(dem), 'work_{}'.format(str(tid)) )) loc_name = 'vis_loc' gbase = run_grass(workspace, location=loc_name, srs=dem) # Start GRASS GIS Session import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, workspace, loc_name, 'PERMANENT') from glass.g.it.rst import rst_to_grs, grs_to_rst from glass.g.rst.surf import grs_viewshed from glass.g.deldt import del_rst # Send DEM to GRASS GIS grs_dem = rst_to_grs(dem, 'grs_dem', as_cmd=True) # Produce Viewshed for each point in obs for idx, row in obs.iterrows(): # Get Viewshed raster vrst = grs_viewshed( grs_dem, (row.geometry.x, row.geometry.y), '{}_{}'.format(vis_basename, str(row[obs_id])), max_dist=maxdst, obs_elv=obselevation ) # Export Raster to File frst = grs_to_rst(vrst, os.path.join(workspace, vrst + '.tif')) # Raster to Array img = gdal.Open(frst) num = img.ReadAsArray() # Two Dimension to One Dimension # Reshape Array numone = num.reshape(num.shape[0] * num.shape[1]) # Get Indexes with visibility visnum = np.arange(numone.shape[0]).astype(np.uint32) visnum = visnum[numone == 1] # Get Indexes intervals visint = get_minmax_fm_seq_values(visnum) # Get rows indexes _visint = visint.reshape(visint.shape[0] * visint.shape[1]) visrow = _visint / num.shape[1] visrow = visrow.astype(np.uint32) # Get cols indexes viscol = _visint - (visrow * num.shape[1]) # Reshape visrow = visrow.reshape(visint.shape) viscol = viscol.reshape(visint.shape) # Split array irow, erow = np.vsplit(visrow.T, 1)[0] icol, ecol = np.vsplit(viscol.T, 1)[0] # Visibility indexes to Pandas DataFrame idxnum = np.full(irow.shape, row[obs_id]) visdf = pd.DataFrame({ 'pntid' : idxnum, 'rowi' : irow, 'rowe' : erow, 'coli': icol, 'cole' : ecol }) # Pandas DF to database # Create Visibility table df_to_db( new_db, visdf, vis_basename, api='psql', colGeom=None, append=None if not idx else True ) # Delete all variables numone = None visnum = None visint = None _visint = None visrow = None viscol = None irow = None erow = None icol = None ecol = None idxnum = None visdf = None del img # Delete GRASS GIS File del_rst(vrst) # Delete TIFF File del_file(frst) frst = None thrds = [mp.Process( target=run_viewshed_by_cpu, name='th-{}'.format(str(i+1)), args=(i+1, dbname, dfs[i], dem, epsg, 'vistoburn', 10000, 500) ) for i in range(len(dfs))] for t in thrds: t.start() for t in thrds: t.join() return 1
def thrd_viewshed(dem, pnt_obs, obs_id, out_folder): """ Compute Viewshed for all points in pnt_obs using a multiprocessing approach """ import os import multiprocessing as mp from glass.g.rd.shp import shp_to_obj from glass.pys.oss import cpu_cores from glass.ng.pd.split import df_split from glass.g.wenv.grs import run_grass # Points to DataFrame obs_df = shp_to_obj(pnt_obs) # Split DF by the number of cores n_cpu = cpu_cores() dfs = df_split(obs_df, n_cpu) def run_viewshed_by_cpu(tid, obs, dem, output, vis_basename='vis', maxdst=None, obselevation=None): # Create GRASS GIS location loc_name = 'loc_' + str(tid) gbase = run_grass(output, location=loc_name, srs=dem) # Start GRASS GIS Session import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, output, loc_name, 'PERMANENT') from glass.g.it.rst import rst_to_grs, grs_to_rst from glass.g.rst.surf import grs_viewshed # Send DEM to GRASS GIS grs_dem = rst_to_grs(dem, 'grs_dem', as_cmd=True) # Produce Viewshed for each point in obs for idx, row in obs.iterrows(): vrst = grs_viewshed( grs_dem, (row.geometry.x, row.geometry.y), '{}_{}'.format(vis_basename, str(row[obs_id])), max_dist=maxdst, obs_elv=obselevation ) frst = grs_to_rst(vrst, os.path.join(output, vrst + '.tif')) thrds = [mp.Process( target=run_viewshed_by_cpu, name='th-{}'.format(str(i+1)), args=(i+1, dfs[i], dem, out_folder, 'vistoburn', 10000, 200) ) for i in range(len(dfs))] for t in thrds: t.start() for t in thrds: t.join() return out_folder
def osmlulc_to_s2grid(ref_raster, osmtolulc, lucol, tmp_folder, results): """ OSM LULC to Sentinel-2 GRID """ from glass.g.smp import nfishnet_fm_rst from glass.pys.oss import lst_ff, cpu_cores from glass.ng.wt import obj_to_tbl from glass.ng.pd.split import df_split from glass.g.rd.shp import shp_to_obj from glass.g.wt.shp import df_to_shp from glass.g.gp.ext import shpext_to_boundshp # Create Fishnets fishnets = mkdir(os.path.join(tmp_folder, 'fishnets_shp')) fnet = nfishnet_fm_rst(ref_raster, 500, 500, fishnets) # List Fishnet df_fnet = pd.DataFrame(fnet, columns=['fishnet']) # List results lst_lulc = lst_ff(osmtolulc, file_format='.shp') # Produce boundaries for each fishnet bf = mkdir(os.path.join(tmp_folder, 'boundaries')) def produce_bound(row): row['bound'] = shpext_to_boundshp( row.fishnet, os.path.join(bf, os.path.basename(row.fishnet))) return row df_fnet = df_fnet.apply(lambda x: produce_bound(x), axis=1) df_fnet['idx'] = df_fnet.index # Get CPU Numbers n_cpu = cpu_cores() # Split data by CPU dfs = df_split(df_fnet, n_cpu) thrds = [ mp.Process(target=thrd_lulc_by_cell, name='th_{}'.format(str(i + 1)), args=(i + 1, dfs[i], lst_lulc, tmp_folder)) for i in range(len(dfs)) ] for i in thrds: i.start() for i in thrds: i.join() # Re-list fishnets fish_files = df_fnet.fishnet.tolist() for fishp in fish_files: # List Intersection files for each fishnet int_files = lst_ff(os.path.join(tmp_folder, fprop(fishp, 'fn')), file_format='.shp') if not len(int_files): continue # Open Fishnet fish_df = shp_to_obj(fishp) fish_df.rename(columns={'FID': 'fid'}, inplace=True) fish_df['area'] = fish_df.geometry.area # Open other files for f in int_files: fn = fprop(f, 'fn') df = shp_to_obj(f) if fn != 'ovl_union': df = df[~df['b_' + lucol].isnull()] clsid = df['b_' + lucol].unique()[0] else: df = df[~df.b_refid.isnull()] clsid = None if fn == 'ovl_union': df['areav'] = df.geometry.area df = pd.DataFrame({ 'areav': df.groupby(['a_FID'])['areav'].agg('sum') }).reset_index() fish_df = fish_df.merge(df, how='left', left_on='fid', right_on='a_FID') if fn != 'ovl_union': fish_df['lu_' + str(clsid)] = fish_df.areav * 100 / fish_df.area else: fish_df['overlay'] = fish_df.areav * 100 / fish_df.area fish_df.drop(['areav', 'a_FID'], axis=1, inplace=True) # Save file df_to_shp(fish_df, os.path.join(results, os.path.basename(fishp))) # Write List of Fishnet obj_to_tbl(df_fnet, os.path.join(results, 'fishnet_list.xlsx')) return results
def search_by_keyword(db, out_tbl, qarea, wgrp=None): """ Get data using keywords """ import os import pandas as pd from multiprocessing import Process, Manager from glass.cons.dsn import search_words, tw_key from glass.ng.pd import merge_df from glass.ng.pd.split import df_split from glass.g.wt.sql import df_to_db # Get API Keys keys = tw_key() # Get search words words = search_words(group=wgrp) # Split search words search_words = [words] if len(keys) == 1 else df_split(words, len(keys)) # Search for data with Manager() as manager: DFS = manager.list() LOG_LST = manager.list() DROP_COLS = ["retweeted"] # Create Threads thrds = [Process( name='tk{}'.format(str(i)), target=get_tweets, args=(DFS, LOG_LST, search_words[i], qarea, keys[i], DROP_COLS, i) ) for i in range(len(search_words))] for t in thrds: t.start() for t in thrds: t.join() if not len(DFS): raise ValueError('NoData was collected!') # Merge all dataframes if len(DFS) == 1: all_df = DFS[0] else: all_df = merge_df(DFS, ignIndex=True, ignoredfstype=True) all_df.rename(columns={"user" : "username"}, inplace=True) # Sanitize time reference all_df['daytime'] = pd.to_datetime(all_df.tweet_time) all_df.daytime = all_df.daytime.astype(str) all_df.daytime = all_df.daytime.str.slice(start=0, stop=-6) all_df.drop('tweet_time', axis=1, inplace=True) # Rename cols all_df.rename(columns={ 'text' : 'txt', 'tweet_lang' : 'tlang', 'user_id' : 'userid', 'user_location' : 'userloc', 'place_country' : 'placecountry', 'place_countryc' : 'placecountryc', 'place_name' : 'placename', 'place_box' : 'placebox', 'place_id' : 'placeid', 'followers_count' : 'followersn' }, inplace=True) # Data to new table df_to_db(db, all_df, out_tbl, append=True, api='psql') # Write log file log_txt = os.path.join( os.path.dirname(os.path.abspath(__file__)), '{}-log.txt'.format(out_tbl) ) with open(log_txt, 'w') as f: f.write("\n".join(LOG_LST)) return log_txt