def get_cluster_rollup(col): print(f'Starting {col}...') sql_analyze = f""" --return clust_id, total points, avg_geom, nearest_site_id, site name and site geom with final as ( -- creates summary for the clustering results joined with original position info with summary as ( select c.{col} as clust_result, pos.uid as uid, count(pos.id) as total_points, ST_Centroid(ST_union(pos.geom)) as avg_geom from {source_table} as pos, {results_table} as c where c.id = pos.id and c.{col} is not null group by clust_result, uid) --from the summary, concats cluster id and uid, gets distance, and cross joins --to get the closest site for each cluster select concat(summary.clust_result::text, '_', summary.uid::text) as clust_id, summary.total_points, sites.site_id as nearest_site_id, sites.port_name as site_name, (ST_Distance(sites.geom::geography, summary.avg_geom::geography)/1000) AS nearest_site_dist_km from summary cross join lateral --gets only the nearest port (select sites.site_id, sites.port_name, sites.geom from sites order by sites.geom <-> avg_geom limit 1) as sites ) --aggregates all data for this set of results into one row insert into {rollup_table} (name, total_clusters, avg_points, average_dist_nearest_port, total_sites, site_names, site_ids) select '{col}', count(final.clust_id), avg(final.total_points), avg(final.nearest_site_dist_km), count(distinct(final.site_name)), array_agg(distinct(final.site_name)), array_agg(distinct(final.nearest_site_id)) from final ;""" # run the sql_analyze conn_pooled = gsta.connect_psycopg2(db_config.colone_cargo_params, print_verbose=False) c_pooled = conn_pooled.cursor() c_pooled.execute(sql_analyze) conn_pooled.commit() c_pooled.close() conn_pooled.close() print(f'Completed {col}')
def calc_nn(uid, tree=ball_tree): print('Working on uid:', uid[0]) iteration_start = datetime.datetime.now() loc_engine = gsta.connect_engine(db_config.colone_cargo_params, print_verbose=False) read_sql = f"""SELECT id, lat, lon FROM {source_table} where uid= '{uid[0]}';""" df = pd.read_sql(sql=read_sql, con=loc_engine) loc_engine.dispose() # Now we are going to use sklearn's BallTree to find the nearest neighbor of # each position for the nearest port. The resulting port_id and dist will be # pushed back to the db with the id, uid, and time to be used in the network # building phase of analysis. This takes up more memory, but means we have # fewer joins. Add an index on uid though before running network building. # transform to radians points_of_int = np.radians(df.loc[:, ['lat', 'lon']].values) # query the tree dist, ind = tree.query(points_of_int, k=1, dualtree=True) # make the data list to pass to the sql query data = np.column_stack( (np.round(((dist.reshape(1, -1)[0]) * 6371.0088), decimals=3), sites.iloc[ind.reshape(1, -1)[0], :].port_id.values.astype('int'), df['id'].values)) # define the sql statement sql_insert = f"INSERT INTO {target_table} (nearest_site_dist_km, nearest_site_id, id) " \ "VALUES(%s, %s, %s);" # write to db loc_conn = gsta.connect_psycopg2(db_config.colone_cargo_params, print_verbose=False) c = loc_conn.cursor() c.executemany(sql_insert, (data.tolist())) loc_conn.commit() c.close() loc_conn.close() print(f'UID {uid[0]} complete in:', datetime.datetime.now() - iteration_start)
"VALUES(%s, %s, %s);" # write to db loc_conn = gsta.connect_psycopg2(db_config.colone_cargo_params, print_verbose=False) c = loc_conn.cursor() c.executemany(sql_insert, (data.tolist())) loc_conn.commit() c.close() loc_conn.close() print(f'UID {uid[0]} complete in:', datetime.datetime.now() - iteration_start) #%% Create "nearest_site" table in the database. conn = gsta.connect_psycopg2(db_config.colone_cargo_params) c = conn.cursor() c.execute(f"""DROP TABLE IF EXISTS {target_table}""") conn.commit() c.execute(f"""CREATE TABLE IF NOT EXISTS {target_table} ( id int, nearest_site_id int , nearest_site_dist_km float );""") conn.commit() c.close() conn.close() #%% get uid lists conn = gsta.connect_psycopg2(db_config.colone_cargo_params, print_verbose=False)
import pandas as pd import networkx as nx # plotting import matplotlib.pyplot as plt # Geo-Spatial Temporal Analysis package import gsta import db_config # reload modules when making edits from importlib import reload reload(gsta) # %% conn = gsta.connect_psycopg2(db_config.colone_cargo_params) loc_engine = gsta.connect_engine(db_config.colone_cargo_params) #%% sample = pd.read_sql_query( "SELECT id, time, lat, lon FROM ais_cargo.public.uid_positions WHERE uid = '636016432'", loc_engine) #%% sample.to_csv('sample_ship_posit.csv') # %% get edgelist from database df_edgelist = gsta.get_edgelist(edge_table='cargo_edgelist_3km', engine=loc_engine, loiter_time=8) print( f"{len(df_edgelist)} edges and {len(df_edgelist['Source'].unique())} nodes."
import os #time tracking import datetime from sklearn.neighbors import BallTree from sklearn.metrics.pairwise import haversine_distances #%% Make and test conn and cursor using psycopg, # and create an engine using sql alchemy # Geo-Spatial Temporal Analysis package import gsta import db_config conn = gsta.connect_psycopg2(db_config.loc_cargo_params) loc_engine = gsta.connect_engine(db_config.loc_cargo_params) #%% center and purity calc functions def get_ports_wpi(engine): ports = pd.read_sql( 'wpi', loc_engine, columns=['index_no', 'port_name', 'latitude', 'longitude']) ports = ports.rename(columns={ 'latitude': 'lat', 'longitude': 'lon', 'index_no': 'port_id' })
import os #time tracking import datetime from sklearn.neighbors import BallTree from sklearn.metrics.pairwise import haversine_distances import warnings warnings.filterwarnings('ignore') # Geo-Spatial Temporal Analysis package import gsta aws_conn = gsta.connect_psycopg2(gsta.aws_ais_cluster_params) loc_conn = gsta.connect_psycopg2(gsta.loc_cargo_params) aws_conn.close() loc_conn.close() #%% This function will be used to write results to the database def df_to_table_with_geom(df, name, eps, min_samples, conn): # add the eps and min_samples value to table name new_table_name = ('dbscan_results_' + name + '_' + str(eps).replace('.', '_') + '_' + str(min_samples)) # drop table if an old one exists c = conn.cursor() c.execute("""DROP TABLE IF EXISTS {}""".format(new_table_name)) conn.commit()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Jan 16 14:14:56 2020 @author: patrickmaus """ import datetime # Geo-Spatial Temporal Analysis package import gsta import db_config aws_conn = gsta.connect_psycopg2(db_config.aws_ais_cluster_params) loc_conn = gsta.connect_psycopg2(db_config.loc_cargo_params) aws_conn.close() loc_conn.close() #%% Create Port Activity table def create_port_activity_table(source_table, destination_table, dist, conn): port_activity_sample_sql = """ -- This SQL query has two with selects and then a final select to create the new table. -- First create the table. Syntax requires its creation before any with clauses. CREATE TABLE {1} AS -- First with clause gets all positions within x meters of any port. Note there are dupes. WITH port_activity as ( SELECT s.id, s.mmsi, s.time, wpi.port_name, wpi.index_no as port_id, (ST_Distance(s.geom::geography, wpi.geog)) as dist_meters FROM {0} AS s JOIN wpi