Esempio n. 1
0
def calc_nn(uid, tree=ball_tree):
    print('Working on uid:', uid[0])
    iteration_start = datetime.datetime.now()
    loc_engine = gsta.connect_engine(db_config.colone_cargo_params,
                                     print_verbose=False)
    read_sql = f"""SELECT id, lat, lon
                FROM {source_table}
                where uid= '{uid[0]}';"""
    df = pd.read_sql(sql=read_sql, con=loc_engine)
    loc_engine.dispose()
    # Now we are going to use sklearn's BallTree to find the nearest neighbor of
    # each position for the nearest port.  The resulting port_id and dist will be
    # pushed back to the db with the id, uid, and time to be used in the network
    # building phase of analysis.  This takes up more memory, but means we have
    # fewer joins.  Add an index on uid though before running network building.
    # transform to radians
    points_of_int = np.radians(df.loc[:, ['lat', 'lon']].values)
    # query the tree
    dist, ind = tree.query(points_of_int, k=1, dualtree=True)
    # make the data list to pass to the sql query
    data = np.column_stack(
        (np.round(((dist.reshape(1, -1)[0]) * 6371.0088), decimals=3),
         sites.iloc[ind.reshape(1, -1)[0], :].port_id.values.astype('int'),
         df['id'].values))
    # define the sql statement
    sql_insert = f"INSERT INTO {target_table} (nearest_site_dist_km, nearest_site_id, id) " \
                 "VALUES(%s, %s, %s);"

    # write to db
    loc_conn = gsta.connect_psycopg2(db_config.colone_cargo_params,
                                     print_verbose=False)
    c = loc_conn.cursor()
    c.executemany(sql_insert, (data.tolist()))
    loc_conn.commit()
    c.close()
    loc_conn.close()
    print(f'UID {uid[0]} complete in:',
          datetime.datetime.now() - iteration_start)
#time tracking
import datetime

from sklearn.neighbors import BallTree
from sklearn.metrics.pairwise import haversine_distances

#%% Make and test conn and cursor using psycopg,
# and create an engine using sql alchemy

# Geo-Spatial Temporal Analysis package
import gsta
import db_config

conn = gsta.connect_psycopg2(db_config.loc_cargo_params)
loc_engine = gsta.connect_engine(db_config.loc_cargo_params)

#%% center and purity calc functions


def get_ports_wpi(engine):
    ports = pd.read_sql(
        'wpi',
        loc_engine,
        columns=['index_no', 'port_name', 'latitude', 'longitude'])
    ports = ports.rename(columns={
        'latitude': 'lat',
        'longitude': 'lon',
        'index_no': 'port_id'
    })
    return ports
Esempio n. 3
0
import numpy as np
import datetime
from multiprocessing import Pool
import os
from sklearn.neighbors import BallTree

cores = os.cpu_count()
workers = cores - 1
print(
    f'This machine has {cores} cores.  Will use {workers} for multiprocessing.'
)

# set tables for processing
source_table = 'ship_sample'
target_table = 'nearest_sample'
engine = gsta.connect_engine(db_config.colone_cargo_params)
#%% get the sits as a df from the database
sites = gsta.get_sites(engine)
engine.dispose()

# build the BallTree using the ports as the candidates
candidates = np.radians(sites.loc[:, ['lat', 'lon']].values)
ball_tree = BallTree(candidates, leaf_size=40, metric='haversine')


def calc_nn(uid, tree=ball_tree):
    print('Working on uid:', uid[0])
    iteration_start = datetime.datetime.now()
    loc_engine = gsta.connect_engine(db_config.colone_cargo_params,
                                     print_verbose=False)
    read_sql = f"""SELECT id, lat, lon
# reload modules when making edits
from importlib import reload

reload(gsta)

#%%
df = pd.read_csv('sample_ship_posits.csv')
df['time'] = pd.to_datetime(df['time'])
try:
    df.drop(['Unnamed: 0'], inplace=True, axis=1)
except Exception as e:
    print (e)
# %%
# conn = gsta.connect_psycopg2(db_config.colone_cargo_params)
loc_engine = gsta.connect_engine(db_config.colone_cargo_params, print_verbose=False)

# %% create a df from the database
df = pd.read_sql_query("SELECT id, time, lat, lon, cog, sog, status, anchored, moored, underway"
                       " FROM ais_cargo.public.uid_positions "
                       "WHERE uid = '636016432'"
                       "ORDER BY time", loc_engine)
#df.to_csv('sample_ship_posits.csv', index=False)

#%%
def calc_dist(df, unit='nm'):
    """
    Takes a df with id, lat, and lon and returns the distance  between
    the previous point to the current point as a series.
    :param df:
    :return: