Ejemplo n.º 1
0
def get_cluster_rollup(col):
    print(f'Starting {col}...')
    sql_analyze = f"""
    --return clust_id, total points, avg_geom, nearest_site_id, site name and site geom
    with final as (
    -- creates summary for the clustering results joined with original position info
        with summary as (
            select c.{col} as clust_result, pos.uid as uid, count(pos.id) as total_points, 
            ST_Centroid(ST_union(pos.geom)) as avg_geom
            from {source_table} as pos, {results_table} as c
            where c.id = pos.id
            and c.{col} is not null
            group by clust_result, uid)
    --from the summary, concats cluster id and uid, gets distance, and cross joins 
    --to get the closest site for each cluster
    select concat(summary.clust_result::text, '_', summary.uid::text) as clust_id, 
        summary.total_points,
        sites.site_id as nearest_site_id, 
        sites.port_name as site_name,
        (ST_Distance(sites.geom::geography, summary.avg_geom::geography)/1000) AS nearest_site_dist_km
        from summary
    cross join lateral --gets only the nearest port
        (select sites.site_id, sites.port_name, sites.geom
        from sites
        order by sites.geom <-> avg_geom limit 1)
        as sites
        )
    --aggregates all data for this set of results into one row
    insert into {rollup_table} (name, total_clusters, avg_points, average_dist_nearest_port,
                          total_sites, site_names, site_ids)
        select '{col}',
        count(final.clust_id), 
        avg(final.total_points), 
        avg(final.nearest_site_dist_km),
        count(distinct(final.site_name)),
        array_agg(distinct(final.site_name)),
        array_agg(distinct(final.nearest_site_id))
    from final
;"""
    # run the sql_analyze
    conn_pooled = gsta.connect_psycopg2(db_config.colone_cargo_params,
                                        print_verbose=False)
    c_pooled = conn_pooled.cursor()
    c_pooled.execute(sql_analyze)
    conn_pooled.commit()
    c_pooled.close()
    conn_pooled.close()
    print(f'Completed {col}')
Ejemplo n.º 2
0
def calc_nn(uid, tree=ball_tree):
    print('Working on uid:', uid[0])
    iteration_start = datetime.datetime.now()
    loc_engine = gsta.connect_engine(db_config.colone_cargo_params,
                                     print_verbose=False)
    read_sql = f"""SELECT id, lat, lon
                FROM {source_table}
                where uid= '{uid[0]}';"""
    df = pd.read_sql(sql=read_sql, con=loc_engine)
    loc_engine.dispose()
    # Now we are going to use sklearn's BallTree to find the nearest neighbor of
    # each position for the nearest port.  The resulting port_id and dist will be
    # pushed back to the db with the id, uid, and time to be used in the network
    # building phase of analysis.  This takes up more memory, but means we have
    # fewer joins.  Add an index on uid though before running network building.
    # transform to radians
    points_of_int = np.radians(df.loc[:, ['lat', 'lon']].values)
    # query the tree
    dist, ind = tree.query(points_of_int, k=1, dualtree=True)
    # make the data list to pass to the sql query
    data = np.column_stack(
        (np.round(((dist.reshape(1, -1)[0]) * 6371.0088), decimals=3),
         sites.iloc[ind.reshape(1, -1)[0], :].port_id.values.astype('int'),
         df['id'].values))
    # define the sql statement
    sql_insert = f"INSERT INTO {target_table} (nearest_site_dist_km, nearest_site_id, id) " \
                 "VALUES(%s, %s, %s);"

    # write to db
    loc_conn = gsta.connect_psycopg2(db_config.colone_cargo_params,
                                     print_verbose=False)
    c = loc_conn.cursor()
    c.executemany(sql_insert, (data.tolist()))
    loc_conn.commit()
    c.close()
    loc_conn.close()
    print(f'UID {uid[0]} complete in:',
          datetime.datetime.now() - iteration_start)
Ejemplo n.º 3
0
                 "VALUES(%s, %s, %s);"

    # write to db
    loc_conn = gsta.connect_psycopg2(db_config.colone_cargo_params,
                                     print_verbose=False)
    c = loc_conn.cursor()
    c.executemany(sql_insert, (data.tolist()))
    loc_conn.commit()
    c.close()
    loc_conn.close()
    print(f'UID {uid[0]} complete in:',
          datetime.datetime.now() - iteration_start)


#%% Create "nearest_site" table in the database.
conn = gsta.connect_psycopg2(db_config.colone_cargo_params)
c = conn.cursor()
c.execute(f"""DROP TABLE IF EXISTS {target_table}""")
conn.commit()
c.execute(f"""CREATE TABLE IF NOT EXISTS {target_table}
(   id int,
    nearest_site_id int ,
    nearest_site_dist_km float
);""")
conn.commit()
c.close()
conn.close()

#%% get uid lists
conn = gsta.connect_psycopg2(db_config.colone_cargo_params,
                             print_verbose=False)
Ejemplo n.º 4
0
import pandas as pd
import networkx as nx

# plotting
import matplotlib.pyplot as plt

# Geo-Spatial Temporal Analysis package
import gsta
import db_config

# reload modules when making edits
from importlib import reload

reload(gsta)
# %%
conn = gsta.connect_psycopg2(db_config.colone_cargo_params)
loc_engine = gsta.connect_engine(db_config.colone_cargo_params)

#%%
sample = pd.read_sql_query(
    "SELECT id, time, lat, lon FROM ais_cargo.public.uid_positions WHERE uid = '636016432'",
    loc_engine)
#%%
sample.to_csv('sample_ship_posit.csv')
# %% get edgelist from database

df_edgelist = gsta.get_edgelist(edge_table='cargo_edgelist_3km',
                                engine=loc_engine,
                                loiter_time=8)
print(
    f"{len(df_edgelist)} edges and {len(df_edgelist['Source'].unique())} nodes."
import os

#time tracking
import datetime

from sklearn.neighbors import BallTree
from sklearn.metrics.pairwise import haversine_distances

#%% Make and test conn and cursor using psycopg,
# and create an engine using sql alchemy

# Geo-Spatial Temporal Analysis package
import gsta
import db_config

conn = gsta.connect_psycopg2(db_config.loc_cargo_params)
loc_engine = gsta.connect_engine(db_config.loc_cargo_params)

#%% center and purity calc functions


def get_ports_wpi(engine):
    ports = pd.read_sql(
        'wpi',
        loc_engine,
        columns=['index_no', 'port_name', 'latitude', 'longitude'])
    ports = ports.rename(columns={
        'latitude': 'lat',
        'longitude': 'lon',
        'index_no': 'port_id'
    })
Ejemplo n.º 6
0
import os

#time tracking
import datetime

from sklearn.neighbors import BallTree
from sklearn.metrics.pairwise import haversine_distances

import warnings

warnings.filterwarnings('ignore')

# Geo-Spatial Temporal Analysis package
import gsta

aws_conn = gsta.connect_psycopg2(gsta.aws_ais_cluster_params)
loc_conn = gsta.connect_psycopg2(gsta.loc_cargo_params)
aws_conn.close()
loc_conn.close()


#%% This function will be used to write results to the database
def df_to_table_with_geom(df, name, eps, min_samples, conn):
    # add the eps and min_samples value to table name
    new_table_name = ('dbscan_results_' + name + '_' +
                      str(eps).replace('.', '_') + '_' + str(min_samples))

    # drop table if an old one exists
    c = conn.cursor()
    c.execute("""DROP TABLE IF EXISTS {}""".format(new_table_name))
    conn.commit()
Ejemplo n.º 7
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 16 14:14:56 2020

@author: patrickmaus
"""

import datetime

# Geo-Spatial Temporal Analysis package
import gsta
import db_config

aws_conn = gsta.connect_psycopg2(db_config.aws_ais_cluster_params)
loc_conn = gsta.connect_psycopg2(db_config.loc_cargo_params)
aws_conn.close()    
loc_conn.close()
#%% Create Port Activity table 
def create_port_activity_table(source_table, destination_table, dist, conn):
    
    port_activity_sample_sql = """
    -- This SQL query has two with selects and then a final select to create the new table.
    -- First create the table.  Syntax requires its creation before any with clauses.
    CREATE TABLE {1} AS
    -- First with clause gets all positions within x meters of any port.  Note there are dupes.
    WITH port_activity as (
    		SELECT s.id, s.mmsi, s.time, wpi.port_name, wpi.index_no as port_id,
    		(ST_Distance(s.geom::geography, wpi.geog)) as dist_meters
    		FROM {0} AS s
    		JOIN wpi