def df_cols_to_rows_smp(df, idcol, colstoval, colstovalname, valcol): """ Dataframe Like: | date | station | RTD | EC | vstation | id 0 | 00:00:14 | None | 16.259 | 31029.021 | 34 | 1 1 | 00:00:39 | None | 21.555 | 32139.144 | 34 | 2 2 | 00:00:56 | None | 23.574 | 17893.594 | 34 | 3 3 | 00:01:09 | None | 22.218 | 43267.140 | 34 | 4 4 | 00:01:30 | None | 16.013 | 45895.465 | 34 | 5 To: | id | param | value 0 | 1 | RTD | 16.259 1 | 1 | EC | 31029.021 2 | 2 | RTD | 21.555 3 | 2 | EC | 32139.144 4 | 3 | RTD | 23.574 5 | 3 | EC | 17893.594 idcol = id colstoval = [RTD, EC] colstovalname = param valcol = value """ from glass.pys import obj_to_lst from glass.ng.pd.dagg import merge_df colstoval = obj_to_lst(colstoval) dfs = [] for col in colstoval: ndf = df.copy(deep=True) dropcols = [c for c in ndf.columns.values if c != col and c != idcol] ndf.drop(dropcols, axis=1, inplace=True) ndf.rename(columns={col: valcol}, inplace=True) ndf[colstovalname] = col dfs.append(ndf) result = merge_df(dfs) return result
def dfcolstorows(inDf, colField, valField, colFid=None): """ Dataframe Like: | pop_res | ind2 | ind3 | id_unit 0 | 571 | 35.0975 | 123 | 3768 1 | 938 | 18.2114 | 265 | 3618 2 | 554 | 44.3149 | 76 | 3788 3 | 711 | 37.8619 | 134 | 3766 4 | 1268 | 46.0733 | 203 | 3690 To: | colField | valField 0 | pop_res | 571 1 | ind2 | 35.0975 2 | ind3 | 123 3 | id_unit | 3768 4 | pop_res | 938 5 | ind2 | 18.2114 6 | ind3 | 265 7 | id_unit | 3618 """ from glass.ng.pd import merge_df newDfs = [] cols = list(inDf.columns.values) if colFid and colFid in cols: cols.remove(colFid) for col in cols: ndf = inDf.copy() ndf.drop([c for c in cols if c != col], axis=1, inplace=True) ndf[colField] = col ndf.rename(columns={col: valField}, inplace=True) newDfs.append(ndf) res = merge_df(newDfs) return res
def merge_tbls(folder, out_tbl, tbl_format='.dbf'): """ Merge all tables in folder into one single table """ from glass.pys.oss import lst_ff from glass.ng.rd import tbl_to_obj from glass.ng.wt import obj_to_tbl from glass.ng.pd import merge_df tbls = lst_ff(folder, file_format=tbl_format) tbls_dfs = [tbl_to_obj(t) for t in tbls] out_df = merge_df(tbls_dfs) obj_to_tbl(out_df, out_tbl) return out_tbl
def same_attr_to_shp(inShps, interestCol, outFolder, basename="data_", resultDict=None): """ For several SHPS with the same field, this program will list all values in such field and will create a new shp for all values with the respective geometry regardeless the origin shp. """ import os from glass.g.rd.shp import shp_to_obj from glass.ng.pd import merge_df from glass.g.wt.shp import df_to_shp EXT = os.path.splitext(inShps[0])[1] shpDfs = [shp_to_obj(shp) for shp in inShps] DF = merge_df(shpDfs, ignIndex=True) #DF.dropna(axis=0, how='any', inplace=True) uniqueVal = DF[interestCol].unique() nShps = [] if not resultDict else {} for val in uniqueVal: ndf = DF[DF[interestCol] == val] KEY = str(val).split('.')[0] if '.' in str(val) else str(val) nshp = df_to_shp(ndf, os.path.join( outFolder, '{}{}{}'.format(basename, KEY, EXT) )) if not resultDict: nShps.append(nshp) else: nShps[KEY] = nshp return nShps
def lst_prod_by_cell_and_year(shp, id_col, year, outshp, platform="Sentinel-2", processingl='Level-2A', epsg=32629): """ Get a list of images: * one for each grid in shp; * one for each month in one year - the choosen image will be the one with lesser area occupied by clouds; total_images = grid_number * number_months_year """ from glass.g.rd.shp import shp_to_obj from glass.ng.pd import merge_df from glass.g.wt.shp import df_to_shp from glass.g.it.pd import df_to_geodf months = { '01': '31', '02': '28', '03': '31', '04': '30', '05': '31', '06': '30', '07': '31', '08': '31', '09': '30', '10': '31', '11': '30', '12': '31' } # Open SHP grid = shp_to_obj(shp, srs_to=4326) def get_grid_id(row): row['cellid'] = row.title.split('_')[5][1:] return row # Search for images dfs = [] for idx, cell in grid.iterrows(): for k in months: start = "{}{}01".format(str(year), k) end = "{}{}{}".format(str(year), k, months[k]) if year == 2018 and processingl == 'Level-2A': if k == '01' or k == '02': plevel = 'Level-2Ap' else: plevel = processingl else: plevel = processingl prod = lst_prod(cell.geometry.wkt, start, end, platname=platform, procLevel=plevel) if not prod.shape[0]: continue # Get area prod = prod.to_crs('EPSG:{}'.format(str(epsg))) prod['areav'] = prod.geometry.area / 1000000 # We want only images with more than 70% of data prod = prod[prod.areav >= 7000] # ID Cell ID prod = prod.apply(lambda x: get_grid_id(x), axis=1) # Filter Cell ID prod = prod[prod.cellid == cell[id_col]] # Sort by cloud cover and date prod = prod.sort_values(['cloudcoverpercentage', 'ingestiondate'], ascending=[True, True]) # Get only the image with less cloud cover prod = prod.head(1) dfs.append(prod) fdf = merge_df(dfs) fdf = df_to_geodf(fdf, 'geometry', epsg) df_to_shp(fdf, outshp) return outshp
def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE): """ Search for data in DSN and other platforms by cell """ import time from glass.g.rd.shp import shp_to_obj from glass.ng.sql.db import create_db from glass.g.acq.dsn.fb.places import places_by_query from glass.g.prj.obj import df_prj from glass.ng.pd import merge_df from glass.g.it.shp import dbtbl_to_shp from glass.ng.sql.q import q_to_ntbl from glass.g.wt.sql import df_to_db # Open GRID SHP GRID_DF = shp_to_obj(GRID_PNT) GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF GRID_DF["lng"] = GRID_DF.geometry.x.astype(float) GRID_DF["lat"] = GRID_DF.geometry.y.astype(float) GRID_DF["grid_id"] = GRID_DF.index # GET DATA RESULTS = [] def get_data(row, datasrc): if datasrc == 'facebook': d = places_by_query({ 'x': row.lng, 'y': row.lat, 'r': RADIUS }, 4326, keyword=None, epsgOut=EPSG, _limit='100', onlySearchAreaContained=None) else: raise ValueError( '{} as datasource is not a valid value'.format(datasrc)) if type(d) == int: return d['grid_id'] = row.grid_id RESULTS.append(d) time.sleep(5) GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1) RT = merge_df(RESULTS) # Create DB create_db(db, overwrite=True, api='psql') # Send Data to PostgreSQL df_to_db(db, RT, "{}_data".format(DATA_SOURCE), EPSG, "POINT", colGeom='geometry' if 'geometry' in RT.columns.values else 'geom') COLS = [ x for x in RT.columns.values if x != "geometry" and \ x != 'geom' and x != "grid_id" ] + ["geom"] GRP_BY_TBL = q_to_ntbl( db, "{}_grpby".format(DATA_SOURCE), ("SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id " "FROM {dtsrc}_data GROUP BY {cols}").format(cols=", ".join(COLS), dtsrc=DATA_SOURCE), api='psql') dbtbl_to_shp(db, GRP_BY_TBL, "geom", OUTPUT_TABLE, api="psql", epsg=EPSG) return OUTPUT_TABLE
def otp_cf_based_on_rel(incidents, group_incidents_col, facilities, facilities_id, rel_inc_fac, sheet, group_fk, facilities_fk, hour, day, output): """ Calculate time travel considering specific facilities for each group of incidents Relations between incidents and facilities are in a auxiliar table (rel_inc_fac). Auxiliar table must be a xlsx file """ import os import pandas as pd from glass.ng.rd import tbl_to_obj from glass.g.rd.shp import shp_to_obj from glass.g.wt.shp import obj_to_shp from glass.g.mob.otp.log import clsfacility from glass.g.prop.prj import get_shp_epsg from glass.ng.pd import merge_df from glass.pys.oss import fprop from glass.g.prj.obj import df_prj # Avoid problems when facilities_id == facilities_fk facilities_fk = facilities_fk + '_fk' if facilities_id == facilities_fk else \ facilities_fk # Open data idf = df_prj(shp_to_obj(incidents), 4326) fdf = df_prj(shp_to_obj(facilities), 4326) rel_df = tbl_to_obj(rel_inc_fac, sheet=sheet) oepsg = get_shp_epsg(incidents) # Relate facilities with incidents groups fdf = fdf.merge(rel_df, how='inner', left_on=facilities_id, right_on=facilities_fk) # List Groups grp_df = pd.DataFrame({ 'cnttemp': idf.groupby([group_incidents_col])[group_incidents_col].agg('count') }).reset_index() # Do calculations res = [] logs = [] for idx, row in grp_df.iterrows(): # Get incidents for that group new_i = idf[idf[group_incidents_col] == row[group_incidents_col]] # Get facilities for that group new_f = fdf[fdf[group_fk] == row[group_incidents_col]] # calculate closest facility cfres, l = clsfacility(new_i, new_f, hour, day, out_epsg=oepsg) res.append(cfres) logs.extend(l) # Merge results out_df = merge_df(res) # Recovery facility id fdf.drop([c for c in fdf.columns.values if c != facilities_id], axis=1, inplace=True) out_df = out_df.merge(fdf, how='left', left_on='ffid', right_index=True) # Export result obj_to_shp(out_df, "geom", oepsg, output) # Write logs if len(logs) > 0: with open( os.path.join(os.path.dirname(output), fprop(output, 'fn') + '_log.txt'), 'w') as txt: for i in logs: txt.write(("Incident_id: {}\n" "Facility_id: {}\n" "ERROR message:\n" "{}\n" "\n\n\n\n\n\n").format(str(i[0]), str(i[1]), str(i[2]))) return output
def otp_servarea(facilities, hourday, date, breaks, output, vel=None): """ OTP Service Area """ import requests import os from glass.cons.otp import ISO_URL from glass.g.rd.shp import shp_to_obj from glass.g.prj.obj import df_prj from glass.g.prop.prj import get_shp_epsg from glass.g.wt.shp import obj_to_shp from glass.pys.oss import fprop from glass.g.it.pd import json_obj_to_geodf from glass.ng.pd import merge_df from glass.pys import obj_to_lst breaks = obj_to_lst(breaks) # Open Data facilities_df = df_prj(shp_to_obj(facilities), 4326) # Place request parameters get_params = [('mode', 'WALK,TRANSIT'), ('date', date), ('time', hourday), ('maxWalkDistance', 50000), ('walkSpeed', 3 if not vel else vel)] breaks.sort() for b in breaks: get_params.append(('cutoffSec', b)) # Do the math error_logs = [] results = [] for i, r in facilities_df.iterrows(): fromPlace = str(r.geometry.y) + ',' + str(r.geometry.x) if not i: get_params.append(('fromPlace', fromPlace)) else: get_params[-1] = ('fromPlace', fromPlace) resp = requests.get(ISO_URL, get_params, headers={'accept': 'application/json'}) try: data = resp.json() except: error_logs.append([i, 'Cannot retrieve JSON Response']) continue gdf = json_obj_to_geodf(data, 4326) gdf['ffid'] = i results.append(gdf) # Merge all Isochrones df_res = merge_df(results) out_epsg = get_shp_epsg(facilities) if out_epsg != 4326: df_res = df_prj(df_res, out_epsg) obj_to_shp(df_res, "geometry", out_epsg, output) # Write logs if len(error_logs): with open( os.path.join(os.path.dirname(output), fprop(output, 'fn') + '.log.txt'), 'w') as txt: for i in error_logs: txt.write(("Facility_id: {}\n" "ERROR message:\n" "{}\n" "\n\n\n\n\n\n").format(str(i[0]), i[1])) return output
def df_cols_to_rows(inDf, TO_COLS, col_old_col_name, key_old_col_name, col_mantain): """ Dataframe like: | pop_res | ind2 | ind3 | ind5 | id_unit |pop_res_int | ind2_int | ind3_int| ind5_int 0 | 571 | 35.0975 | 123 | 97.373 | 3768 | 2 | 6 | 2 | 7 1 | 938 | 18.2114 | 265 | 93.4968 | 3618 | 3 | 1 | 5 | 4 2 | 554 | 44.3149 | 76 | 97.4074 | 3788 | 1 | 7 | 1 | 7 3 | 711 | 37.8619 | 134 | 96.1429 | 3766 | 2 | 6 | 3 | 6 4 | 1268 | 46.0733 | 203 | 90.9385 | 3690 | 5 | 7 | 4 | 3 To: 0 | id_unit | id_indicator | value | cls 2 | 3768 | pop_res | 571 | 2 3 | 3768 | ind2 | 35.0975 | 6 4 | 3768 | ind3 | 123 | 2 5 | 3768 | ind5 | 97.373 | 7 6 | 3618 | pop_res | 938 | 3 7 | 3618 | ind2 | 18.2114 | 1 8 | 3618 | ind3 | 265 | 5 9 | 3618 | ind5 | 93.4968 | 4 ... Using as parameters: data_cols = ['pop_res', 'ind2', 'ind3', 'ind5'] col_mantain = 'id_unit' TO_COLS = { # Dict values should have the same length 'value' : data_cols, 'cls' : [i + '_int' for i in data_cols] } col_old_col_name = 'id_indicator' key_old_col_name = 'value' """ from glass.pys import obj_to_lst from glass.ng.pd import merge_df col_mantain = obj_to_lst(col_mantain) newCols = list(TO_COLS.keys()) newDfs = [] for i in range(len(TO_COLS[newCols[0]])): ndf = inDf.copy(deep=True) DROP_COLS = [] COLS_MANT = col_mantain.copy() for K in TO_COLS: COLS_MANT.append(TO_COLS[K][i]) for col in ndf.columns.values: if col not in COLS_MANT: DROP_COLS.append(col) ndf.drop(DROP_COLS, axis=1, inplace=True) ndf.rename(columns={TO_COLS[k][i]: k for k in TO_COLS}, inplace=True) ndf[col_old_col_name] = TO_COLS[key_old_col_name][i] newDfs.append(ndf) outDf = merge_df(newDfs) return outDf
def closest_facility(incidents, incidents_id, facilities, output, impedance='TravelTime'): """ impedance options: * TravelTime; * WalkTime; """ import requests import pandas as pd import numpy as np from glass.cons.esri import rest_token, CF_URL from glass.g.it.esri import json_to_gjson from glass.g.rd.shp import shp_to_obj from glass.g.wt.shp import df_to_shp from glass.ng.pd.split import df_split from glass.ng.pd import merge_df from glass.g.prop.prj import get_shp_epsg from glass.g.prj.obj import df_prj from glass.g.it.pd import df_to_geodf from glass.g.it.pd import json_obj_to_geodf from glass.cons.esri import get_tv_by_impedancetype # Get API token token = rest_token() # Data to Pandas DataFrames fdf = shp_to_obj(facilities) idf = shp_to_obj(incidents) # Re-project to WGS84 fdf = df_prj(fdf, 4326) idf = df_prj(idf, 4326) # Geomtries to Str - inputs for requests fdf['coords'] = fdf.geometry.x.astype(str) + ',' + fdf.geometry.y.astype( str) idf['coords'] = idf.geometry.x.astype(str) + ',' + idf.geometry.y.astype( str) # Delete geometry from facilities DF idf.drop(['geometry'], axis=1, inplace=True) # Split data # ArcGIS API only accepts 100 facilities # # and 100 incidents in each request fdfs = df_split(fdf, 100, nrows=True) if fdf.shape[0] > 100 else [fdf] idfs = df_split(idf, 100, nrows=True) if idf.shape[0] > 100 else [idf] for i in range(len(idfs)): idfs[i].reset_index(inplace=True) idfs[i].drop(['index'], axis=1, inplace=True) for i in range(len(fdfs)): fdfs[i].reset_index(inplace=True) fdfs[i].drop(['index'], axis=1, inplace=True) # Get travel mode tv = get_tv_by_impedancetype(impedance) # Ask for results results = [] drop_cols = [ 'ObjectID', 'FacilityID', 'FacilityRank', 'Name', 'IncidentCurbApproach', 'FacilityCurbApproach', 'IncidentID', 'StartTime', 'EndTime', 'StartTimeUTC', 'EndTimeUTC', 'Total_Minutes', 'Total_TruckMinutes', 'Total_TruckTravelTime', 'Total_Miles' ] if impedance == 'WalkTime': tv_col = 'walktime' rn_cols = {'Total_WalkTime': tv_col} ndrop = ['Total_Kilometers', 'Total_TravelTime', 'Total_Minutes'] elif impedance == 'metric': tv_col = 'kilomts' rn_cols = {'Total_Kilometers': tv_col} ndrop = ['Total_WalkTime', 'Total_TravelTime', 'Total_Minutes'] else: tv_col = 'traveltime' rn_cols = {'Total_TravelTime': tv_col} ndrop = ['Total_Kilometers', 'Total_WalkTime', 'Total_Minutes'] drop_cols.extend(ndrop) for i_df in idfs: incidents_str = i_df.coords.str.cat(sep=';') for f_df in fdfs: facilities_str = f_df.coords.str.cat(sep=';') # Make request r = requests.get(CF_URL, params={ 'facilities': facilities_str, 'incidents': incidents_str, 'token': token, 'f': 'json', 'travelModel': tv, 'defaultTargetFacilityCount': '1', 'returnCFRoutes': True, 'travelDirection': 'esriNATravelDirectionToFacility', 'impedanceAttributeName': impedance }) if r.status_code != 200: raise ValueError('Error when requesting from: {}'.format( str(r.url))) # Convert ESRI json to GeoJson esri_geom = r.json() geom = json_to_gjson(esri_geom.get('routes')) # GeoJSON to GeoDataFrame gdf = json_obj_to_geodf(geom, 4326) # Delete unwanted columns gdf.drop(drop_cols, axis=1, inplace=True) # Rename some interest columns gdf.rename(columns=rn_cols, inplace=True) # Add to results original attributes of incidents r_df = gdf.merge(i_df, how='left', left_index=True, right_index=True) results.append(r_df) # Compute final result # Put every DataFrame in a single DataFrame fgdf = merge_df(results) # Since facilities were divided # The same incident has several "nearest" facilities # We just want one neares facility # Lets group by using min operator gpdf = pd.DataFrame(fgdf.groupby([incidents_id]).agg({tv_col: 'min' })).reset_index() gpdf.rename(columns={incidents_id: 'iid', tv_col: 'impcol'}, inplace=True) # Recovery geometry fgdf = fgdf.merge(gpdf, how='left', left_on=incidents_id, right_on='iid') fgdf = fgdf[fgdf[tv_col] == fgdf.impcol] fgdf = df_to_geodf(fgdf, 'geometry', 4326) # Remove repeated units g = fgdf.groupby('iid') fgdf['rn'] = g[tv_col].rank(method='first') fgdf = fgdf[fgdf.rn == 1] fgdf.drop(['iid', 'rn'], axis=1, inplace=True) # Re-project to original SRS epsg = get_shp_epsg(facilities) fgdf = df_prj(fgdf, epsg) # Export result df_to_shp(fgdf, output) return output
def service_areas(facilities, breaks, output, impedance='TravelTime'): """ Produce Service Areas Polygons """ import requests from glass.cons.esri import rest_token, SA_URL from glass.g.rd.shp import shp_to_obj from glass.g.prj.obj import df_prj from glass.g.it.esri import json_to_gjson from glass.g.it.pd import json_obj_to_geodf from glass.g.wt.shp import df_to_shp from glass.cons.esri import get_tv_by_impedancetype from glass.ng.pd.split import df_split from glass.ng.pd import merge_df from glass.g.prop.prj import get_shp_epsg # Get Token token = rest_token() # Get data pntdf = shp_to_obj(facilities) pntdf = df_prj(pntdf, 4326) pntdf['coords'] = pntdf.geometry.x.astype( str) + ',' + pntdf.geometry.y.astype(str) pntdf.drop(['geometry'], axis=1, inplace=True) dfs = df_split(pntdf, 100, nrows=True) # Make requests gdfs = [] for df in dfs: facilities_str = df.coords.str.cat(sep=';') tv = get_tv_by_impedancetype(impedance) r = requests.get( SA_URL, params={ 'facilities': facilities_str, 'token': token, 'f': 'json', 'travelModel': tv, 'defaultBreaks': ','.join(breaks), 'travelDirection': 'esriNATravelDirectionToFacility', #'travelDirection' : 'esriNATravelDirectionFromFacility', 'outputPolygons': 'esriNAOutputPolygonDetailed', 'impedanceAttributeName': impedance }) if r.status_code != 200: raise ValueError('Error when requesting from: {}'.format(str( r.url))) esri_geom = r.json() geom = json_to_gjson(esri_geom.get('saPolygons')) gdf = json_obj_to_geodf(geom, 4326) gdf = gdf.merge(df, how='left', left_index=True, right_index=True) gdfs.append(gdf) # Compute final result fgdf = merge_df(gdfs) epsg = get_shp_epsg(facilities) fgdf = df_prj(fgdf, epsg) df_to_shp(fgdf, output) return output
def search_by_keyword(db, out_tbl, qarea, wgrp=None): """ Get data using keywords """ import os import pandas as pd from multiprocessing import Process, Manager from glass.cons.dsn import search_words, tw_key from glass.ng.pd import merge_df from glass.ng.pd.split import df_split from glass.g.wt.sql import df_to_db # Get API Keys keys = tw_key() # Get search words words = search_words(group=wgrp) # Split search words search_words = [words] if len(keys) == 1 else df_split(words, len(keys)) # Search for data with Manager() as manager: DFS = manager.list() LOG_LST = manager.list() DROP_COLS = ["retweeted"] # Create Threads thrds = [Process( name='tk{}'.format(str(i)), target=get_tweets, args=(DFS, LOG_LST, search_words[i], qarea, keys[i], DROP_COLS, i) ) for i in range(len(search_words))] for t in thrds: t.start() for t in thrds: t.join() if not len(DFS): raise ValueError('NoData was collected!') # Merge all dataframes if len(DFS) == 1: all_df = DFS[0] else: all_df = merge_df(DFS, ignIndex=True, ignoredfstype=True) all_df.rename(columns={"user" : "username"}, inplace=True) # Sanitize time reference all_df['daytime'] = pd.to_datetime(all_df.tweet_time) all_df.daytime = all_df.daytime.astype(str) all_df.daytime = all_df.daytime.str.slice(start=0, stop=-6) all_df.drop('tweet_time', axis=1, inplace=True) # Rename cols all_df.rename(columns={ 'text' : 'txt', 'tweet_lang' : 'tlang', 'user_id' : 'userid', 'user_location' : 'userloc', 'place_country' : 'placecountry', 'place_countryc' : 'placecountryc', 'place_name' : 'placename', 'place_box' : 'placebox', 'place_id' : 'placeid', 'followers_count' : 'followersn' }, inplace=True) # Data to new table df_to_db(db, all_df, out_tbl, append=True, api='psql') # Write log file log_txt = os.path.join( os.path.dirname(os.path.abspath(__file__)), '{}-log.txt'.format(out_tbl) ) with open(log_txt, 'w') as f: f.write("\n".join(LOG_LST)) return log_txt