def get_neighbors(nid, connection, spatial=True): """ Get the geospatial neighbors of a node. spatial: True: use MySQL spatial functions (5.6+) False: use minimum bounding rectangles """ fmt = coll.defaultdict(str) fmt["nid"] = nid if spatial: st_fun = "ST_DISTANCE(target.segment, source.segment)" fmt["order"] = "ORDER BY " + st_fun fmt["geo"] = "ST_" else: fmt["geo"] = "MBR" sql = [ "SELECT target.id AS nid", "FROM operational source, operational target", "WHERE {1}INTERSECTS(source.segment, target.segment)", "AND source.id = {0} AND target.id <> {0} {2}", ] sql = db.process(sql, *[fmt[x] for x in ("nid", "geo", "order")]) with db.DatabaseCursor(connection) as cursor: cursor.execute(sql) return frozenset([row["nid"] for row in cursor])
def get_neighbors(nid, connection, spatial=True): ''' Get the geospatial neighbors of a node. spatial: True: use MySQL spatial functions (5.6+) False: use minimum bounding rectangles ''' fmt = coll.defaultdict(str) fmt['nid'] = nid if spatial: st_fun = 'ST_DISTANCE(target.segment, source.segment)' fmt['order'] = 'ORDER BY ' + st_fun fmt['geo'] = 'ST_' else: fmt['geo'] = 'MBR' sql = [ 'SELECT target.id AS nid', 'FROM operational source, operational target', 'WHERE {1}INTERSECTS(source.segment, target.segment)', 'AND source.id = {0} AND target.id <> {0} {2}', ] sql = db.process(sql, [fmt[x] for x in ('nid', 'geo', 'order')]) with db.DatabaseCursor(connection) as cursor: cursor.execute(sql) return frozenset([row['nid'] for row in cursor])
def get_neighbors(nid, connection, spatial=True): ''' Get the geospatial neighbors of a node. spatial: True: use MySQL spatial functions (5.6+) False: use minimum bounding rectangles ''' fmt = coll.defaultdict(str) fmt['nid'] = nid if spatial: st_fun = 'ST_DISTANCE(target.segment, source.segment)' fmt['order'] = 'ORDER BY ' + st_fun fmt['geo'] = 'ST_' else: fmt['geo'] = 'MBR' sql = [ 'SELECT target.id AS nid', 'FROM operational source, operational target', 'WHERE {1}INTERSECTS(source.segment, target.segment)', 'AND source.id = {0} AND target.id <> {0} {2}', ] sql = db.process(sql, [ fmt[x] for x in ('nid', 'geo', 'order') ]) with db.DatabaseCursor(connection) as cursor: cursor.execute(sql) return frozenset([ row['nid'] for row in cursor ])
def get_sql(self, nid): sql = [ 'SELECT n.{1} AS nid', 'FROM network n', 'LEFT JOIN operational o ON n.{1} = o.id', 'WHERE n.{2} = {0} AND o.id IS NOT NULL', ] return db.process(sql, nid, *self.columns)
def __init__(self): super().__init__() sql = [ 'SELECT n.id AS id, o.id AS valid,' 'ST_ASTEXT(n.segment) AS segment', 'FROM node n', 'LEFT OUTER JOIN operational o ON n.id = o.id', 'WHERE n.segment IS NOT NULL', ] self.sql = db.process(sql)
def get_sql(self, nid): self.fmt['nid'] = nid sql = [ 'SELECT target.id AS nid', 'FROM operational source, operational target', 'WHERE {1}INTERSECTS(source.segment, target.segment)', 'AND source.id = {0} AND target.id <> {0} {2}', ] params = [ self.fmt[x] for x in ('nid', 'geo', 'order') ] return db.process(sql, *params)
def __get_readings(self, connection): sql = [ 'SELECT as_of, speed, travel_time / {1} AS travel', 'FROM reading', 'WHERE node = {0}', 'ORDER BY as_of ASC', ] sql = db.process(sql, [ self.nid, constant.minute ]) data = pd.read_sql_query(sql, con=connection, index_col='as_of') data.columns = [ 'speed', 'travel' ] return data.resample(self.freq).mean() if self.freq else data
def __get_name(self, connection): sql = ["SELECT name", "FROM operational", "WHERE id = {0}"] sql = db.process(sql, self.nid) with db.DatabaseCursor(connection) as cursor: cursor.execute(sql) if cursor.rowcount != 1: err = "{0} does not exist!".format(self.nid) raise AttributeError(err) row = cursor.fetchone() return row["name"]
def __get_readings(self, connection): sql = [ 'SELECT as_of, speed, travel_time / {1} AS travel', 'FROM reading', 'WHERE node = {0}', 'ORDER BY as_of ASC', ] sql = db.process(sql, [self.nid, constant.minute]) data = pd.read_sql_query(sql, con=connection, index_col='as_of') data.columns = ['speed', 'travel'] return data.resample(self.freq).mean() if self.freq else data
def __get_name(self, connection): sql = [ 'SELECT name', 'FROM operational', 'WHERE id = {0}', ] sql = db.process(sql, [ self.nid ]) with db.DatabaseCursor(connection) as cursor: cursor.execute(sql) if cursor.rowcount != 1: err = '{0} does not exist!'.format(self.nid) raise AttributeError(err) row = cursor.fetchone() return row['name']
def __len__(self): if self.length is None: with db.DatabaseConnection() as connection: with db.DatabaseCursor(connection) as cursor: sql = ["SELECT ST_LENGTH(segment) * 69 AS length", "FROM reading", "WHERE id = {0}"] sql = db.process(sql, self.nid) cursor.execute(sql) if cursor.rowcount == 1: row = cursor.fetchone() self.length = float(row["length"]) else: self.length = -1 return self.length
def __get_name(self, connection): sql = [ 'SELECT name', 'FROM operational', 'WHERE id = {0}', ] sql = db.process(sql, [self.nid]) with db.DatabaseCursor(connection) as cursor: cursor.execute(sql) if cursor.rowcount != 1: err = '{0} does not exist!'.format(self.nid) raise AttributeError(err) row = cursor.fetchone() return row['name']
def __get_readings(self, connection, speed_threshold=None): sql = ["SELECT as_of, speed, travel_time / {1} AS travel", "FROM reading", "WHERE node = {0}"] # speed_threshold is intended to remove outliers. If # specified, it will restrict the results to speeds that are # that percentage above the New York speed limit (65 mph). if speed_threshold is not None: s = "AND speed < {0}".format(65 * (1 + speed_threshold)) sql.append(s) sql.append("ORDER BY as_of ASC") sql = db.process(sql, self.nid, constant.minute) data = pd.read_sql_query(sql, con=connection, index_col="as_of") data.columns = ["speed", "travel"] return data.resample(self.freq).mean() if self.freq else data
def __init__(self, k=4): super().__init__() fname = '/Users/jerome/nyc/src/cluster/data/speed/dat.csv' self.df = pd.read_csv(fname, index_col='nid') self.df = self.df[k] color = Color() assert(all([ x in self.df.unique() for x in range(self.df.max()) ])) # self.pallete = [ color.unique() for _ in self.df.unique() ] self.pallete = [ 'red', 'blue', 'green', 'orange', ] sql = [ 'SELECT id, ST_ASTEXT(segment) AS segment', 'FROM node', 'WHERE segment IS NOT NULL', 'AND id IN ({0})' ] self.sql = db.process(sql, ','.join(map(str, self.df.index))) print(self.sql)
from lib import db # # Removes values prior to 7 November 2014. Such values will # occasionally be reported, which are erroneous. # sql = [ 'DELETE FROM reading', "WHERE as_of < '{0}'", ] sql = db.process(sql, ['2014-11-07']) db.EstablishCredentials(user='******') with db.DatabaseConnection() as connection: with db.DatabaseCursor(connection) as cursor: cursor.execute(sql)
def __init__(self): sql = [ 'SELECT id, ST_ASTEXT(segment) AS segment', 'FROM node', 'WHERE segment IS NOT NULL', ] self.sql = db.process(sql)
from lib import db # # Removes values prior to 7 November 2014. Such values will # occasionally be reported, which are erroneous. # sql = [ 'DELETE FROM reading', "WHERE as_of < '{0}'", ] sql = db.process(sql, '2014-11-07') db.EstablishCredentials(user='******') with db.DatabaseConnection() as connection: with db.DatabaseCursor(connection) as cursor: cursor.execute(sql)
# import matplotlib # matplotlib.style.use('ggplot') import matplotlib.pyplot as plt plt.style.use('ggplot') from lib import db from lib import utils with db.DatabaseConnection() as con: sql = [ 'SELECT frequency / {0} AS freq', 'FROM quality', 'ORDER BY frequency ASC', ] sql = db.process(sql, [ constant.minute ]) df = pd.read_sql_query(sql, con=con) df['dist'] = df.apply(lambda x: (x.index + 1) / len(df)) args = { 'xlim': (1, 5), 'ylim': (0, 1), 'yticks': np.linspace(0, 1, 11), 'legend': False } plot = df.plot(x='freq', y='dist', **args) plot.set_xlabel('Reporting frequency (min)') plot.set_ylabel('Fraction of segments') utils.mkplot_(plot, 'reporting.png')
# # Open and parse the data file # with open(args.input, mode='rb') as fp: data = pickle.load(fp) keys = [] values = [] for i in data: if not keys: keys = i.keys() values.append([ i[x] for x in keys ]) assert(keys and values) # # Create the SQL statement and execute! # s = [ '%s' ] * len(keys) opts = [ ','.join(x) for x in (keys, s) ] sql = [ 'INSERT IGNORE INTO reading ({0})', 'VALUES ({1})' ] sql = db.process(sql, opts) db.EstablishCredentials(user='******') with db.DatabaseConnection() as connection: with db.DatabaseCursor(connection) as cursor: # http://stackoverflow.com/a/18245311 cursor.executemany(sql, values)
cargs = cli.CommandLine(cli.optsfile('storage')) args = cargs.args # # Open and parse the data file # with open(args.input, mode='rb') as fp: data = pickle.load(fp) keys = [] values = [] for i in data: if not keys: keys = i.keys() values.append([i[x] for x in keys]) assert (keys and values) # # Create the SQL statement and execute! # s = ['%s'] * len(keys) opts = [','.join(x) for x in (keys, s)] sql = ['INSERT IGNORE INTO reading ({0})', 'VALUES ({1})'] sql = db.process(sql, opts) db.EstablishCredentials(user='******') with db.DatabaseConnection() as connection: with db.DatabaseCursor(connection) as cursor: # http://stackoverflow.com/a/18245311 cursor.executemany(sql, values)
from lib import db # # Removes values prior to 7 November 2014. Such values will # occasionally be reported, which are erroneous. # sql = [ 'DELETE FROM reading', "WHERE as_of < '{0}'", ] sql = db.process(sql, [ '2014-11-07' ]) db.EstablishCredentials(user='******') with db.DatabaseConnection() as connection: with db.DatabaseCursor(connection) as cursor: cursor.execute(sql)