def set_eindeutige_stop_id(self): """ Setze eindeutige stop_id """ cur = self.conn.cursor() sql = """ -- setze für noch nicht gefundenen H_IDs Haltestellennummern aus verschiedenen Haltestellentabellen UPDATE trips f SET stop_id = h."H_ID" FROM ( SELECT h."H_Name" FROM stops h GROUP BY h."H_Name" HAVING count(*) = 1 ) h1, stops h WHERE h."H_Name" = h1."H_Name" AND f.stop_id IS NULL AND f."H_Name" = h."H_Name"; """ cur.execute(sql) logger.info('eindeutige stop_ids aus Haltestellen: %s' % cur.statusmessage)
def save_haltestellen_id(self): """ save H_ID into stop_id before deleting a trip """ cur = self.conn.cursor() sql = """ -- sichere vor dem Löschen H_ID, die bei doppelten Fahrten gefunden wurden in Spalte stop_id UPDATE trips f SET stop_id = f2."H_ID" FROM ( SELECT a.abfahrt_id_final, f.fahrt_index, max(f."H_ID") AS "H_ID" FROM trips f, departures a WHERE f.abfahrt_id = a.abfahrt_id GROUP BY a.abfahrt_id_final, f.fahrt_index ) f2 WHERE f.abfahrt_id = f2.abfahrt_id_final AND f.fahrt_index = f2.fahrt_index AND f2."H_ID" IS NOT NULL; """ cur.execute(sql) logger.info('save haltestellen_ids: %s' % cur.statusmessage)
def set_stop_id(self): sql = """ UPDATE trips SET stop_id = "H_ID" WHERE "H_ID" IS NOT NULL; UPDATE trips SET stop_id = NULL WHERE stop_id >= 10000000; DELETE FROM stops WHERE "H_ID" >= 10000000; """ cur = self.conn.cursor() cur.execute(sql) logger.info('Set Stop ID: %s' % cur.statusmessage)
def delete_invalid_fahrten(self): sql = """ DELETE FROM trips WHERE abfahrt_id IN ( SELECT f.abfahrt_id FROM trips f LEFT JOIN departures a ON f.abfahrt_id = a.abfahrt_id WHERE a.abfahrt_id IS NULL) ; """ cur = self.conn.cursor() cur.execute(sql) logger.info('Delete invalid trips: %s' % cur.statusmessage)
def test_for_negative_travel_times(self): cur = self.conn.cursor() sql = """ SELECT count(*) FROM ( SELECT *, lag("H_Abfahrt") OVER(PARTITION BY abfahrt_id ORDER BY fahrt_index) AS ab0 FROM trips f )f where "H_Ankunft" < ab0; """ cur.execute(sql) result = cur.fetchone() sql = """ SELECT count(*) FROM trips f where "H_Abfahrt" < "H_Ankunft"; """ cur.execute(sql) result2 = cur.fetchone() if result[0] or result2[0]: logger.warning('''There exist still negative travel times with departures before arrivals!''') if result[0]: sql = """ SELECT abfahrt_id FROM ( SELECT *, lag("H_Abfahrt") OVER(PARTITION BY abfahrt_id ORDER BY fahrt_index) AS ab0 FROM trips f )f WHERE "H_Ankunft" < ab0; """ cur.execute(sql) abfahrten = np.array(cur.fetchall()) logger.warning('%s' % abfahrten) if result2[0]: sql = """ SELECT abfahrt_id FROM trips f WHERE "H_Abfahrt" < "H_Ankunft"; """ cur.execute(sql) abfahrten = np.array(cur.fetchall()) logger.warning('%s' % abfahrten) # raise ValueError('''There exist still negative travel times with # departures before arrivals!''') else: logger.info('no negative travel times')
def copy_layer(self, schema, layer, dest_schema): """ copy layer Parameters ---------- layer : str """ cmd = '{OGR2OGR} -overwrite -geomfield geom -nln {layer} {srid_option} -lco FEATURE_DATASET="{dest_schema}" -f "FileGDB" {path} PG:"host={host} port={port} user={user} dbname={db}" "{schema}.{layer}"' if self.gdbname is None: gdbname = '{db}.gdb'.format(db=self.destination_db) else: gdbname = self.gdbname if not gdbname.endswith('.gdb'): gdbname += '.gdb' # get srid if self.target_srid is None: srid = self.get_target_srid_from_dest_db() srid_option = '-a_srs EPSG:{srid}'.format(srid=srid) else: srid_option = '-t_srs EPSG:{srid}'.format( srid=self.target_srid) folder = os.path.join(self.folder, 'projekte', self.destination_db, 'fgdb', ) self.make_folder(folder) path = os.path.join(folder, gdbname) full_cmd = cmd.format(OGR2OGR=self.OGR2OGRPATH, layer=layer, srid_option=srid_option, path=path, host=self.login.host, port=self.login.port, user=self.login.user, db=self.login.db, schema=schema, dest_schema=dest_schema, ) logger.info(full_cmd) ret = subprocess.call(full_cmd, shell=self.SHELL) if ret: raise IOError('Layer {layer} could not be copied to FGDB'.format(layer=layer))
def reset_ein_aus(self): sql1 = """ UPDATE trips SET "H_Abfahrt" = NULL WHERE ein = FALSE; """ sql2 = """ UPDATE trips SET "H_Ankunft" = NULL WHERE aus = FALSE; """ cur = self.conn.cursor() cur.execute(sql1) logger.info('no Entry: %s' % cur.statusmessage) cur.execute(sql2) logger.info('no Exit: %s' % cur.statusmessage)
def set_departure_to_day_before(self): """ """ sql = """ --setze Abfahrtstag für Ankunftszeiten zwischen 12:00 und 23:59 auf Folgetag UPDATE trips f SET "H_Abfahrt" = "H_Abfahrt" - interval '1 day' FROM --SELECT * FROM trips f, (SELECT abf.abfahrt_id, abf.d0, abf.h0, ank.d1, ank.h1 FROM (SELECT abfahrt_id, h1, d1 FROM ( SELECT abfahrt_id, extract(hour from "H_Ankunft") AS h1 , extract(day from "H_Ankunft") AS d1, "H_Ankunft", fahrt_index, max(fahrt_index) over (PARTITION BY abfahrt_id) AS maxindex FROM trips ) f1 WHERE fahrt_index = maxindex) ank, (SELECT abfahrt_id, extract(hour from "H_Abfahrt") AS h0, extract(day from "H_Abfahrt") AS d0 FROM trips WHERE fahrt_index = 1 ) AS abf, departures a WHERE abf.abfahrt_id = ank.abfahrt_id AND a.abfahrt_id = ank.abfahrt_id -- wenn der Tag gleich ist AND abf.d0 = ank.d1 AND abf.d0 = ank.d1 -- und die Ankunft an Endhaltestelle nach der Abfahrt an der Starthaltestelle erfolgt AND ank.h1 < abf.h0 ORDER BY abf.abfahrt_id) b WHERE f.abfahrt_id = b.abfahrt_id AND extract(hour from "H_Abfahrt") >= 12; """ cur = self.conn.cursor() cur.execute(sql) logger.info('set_departure_to_day_before: %s' % cur.statusmessage)
def shift_trips(self): sql1 = """ -- korrigiere Fahrten über Tagesgrenzen hinaus --verschiebe Fahrten nach vorne, so dass nur Fahrten heute und morgen auftreten UPDATE trips f SET "H_Abfahrt" = "H_Abfahrt" + '1 day'::INTERVAL, "H_Ankunft" = "H_Ankunft" + '1 day'::INTERVAL FROM ( SELECT DISTINCT abfahrt_id FROM trips WHERE date_part('day', "H_Abfahrt") < {today}) v WHERE f.abfahrt_id = v.abfahrt_id ; """.format(today=self.today.day) sql2 = """ --verschiebe Fahrten nach hinten, so dass nur Fahrten heute und morgen auftreten UPDATE trips f SET "H_Abfahrt" = "H_Abfahrt" - '1 day'::INTERVAL, "H_Ankunft" = "H_Ankunft" - '1 day'::INTERVAL FROM ( SELECT DISTINCT abfahrt_id FROM trips WHERE date_part('day', "H_Ankunft") > {tomorrow}) v WHERE f.abfahrt_id = v.abfahrt_id ; """.format(tomorrow=self.today.day + 1) cur = self.conn.cursor() n_updated = 1 while n_updated: cur.execute(sql1) msg = cur.statusmessage n_updated = int(msg.split(' ')[1]) logger.info('Shift Trips forward: %s' % cur.statusmessage) n_updated = 1 while n_updated: cur.execute(sql2) msg = cur.statusmessage n_updated = int(msg.split(' ')[1]) logger.info('Shift Trips backward: %s' % cur.statusmessage)
def copy2pbf(self): """ copy the according schema to a pbf with osmosis """ fn = f'{self.login.db}_{self.network}' folder = os.path.join(self.folder, 'projekte', self.login.db, self.subfolder, ) self.make_folder(folder) file_path = os.path.join(folder, fn) if self.as_xml: to_xml = f' --tee --write-xml file={file_path}.osm.bz2 ' else: to_xml = '' cmd = ('{OSMOSIS} -v ' '--read-pgsql ' 'postgresSchema={schema} ' 'authFile="{authfile}" ' 'host={host}:{port} user={user} database={db} ' '--dataset-dump {to_xml}' '--write-pbf omitmetadata=true file={fn}.osm.pbf') full_cmd = cmd.format(OSMOSIS=self.OSMOSISPATH, schema=self.schema, authfile=self.AUTHFILE, host=self.login.host, port=self.login.port, user=self.login.user, db=self.login.db, fn=file_path, to_xml=to_xml, ) logger.info(full_cmd) ret = subprocess.call(full_cmd, shell=self.SHELL) if ret: layer = 'pbf' raise IOError(f'Layer {layer} could not becopied to pbf-file')
def delete_multiple_abfahrten(self): """ delete abfahrten marked with keep=False """ cur = self.conn.cursor() sql = """ -- lösche doppelte Fahrten DELETE FROM departures WHERE keep = FALSE OR keep IS NULL; """ cur.execute(sql) logger.info('abfahrten deleted: %s' % cur.statusmessage) sql = """ -- remove copied dummy-haltestellen-ids UPDATE trips f SET stop_id = NULL WHERE stop_id > 10000000; """ cur.execute(sql) logger.info('dummy-haltestellen_ids removed: %s' % cur.statusmessage)
def show_multiple_trips(self): cur = self.conn.cursor() sql = ''' SELECT f.*, a.keep FROM trips f, (SELECT abfahrt_id, abfahrt_id_final FROM departures WHERE keep = False) a1, departures a WHERE f.abfahrt_id = a.abfahrt_id AND a1.abfahrt_id_final = a.abfahrt_id_final ORDER BY f."Fahrt_Name", f.abfahrt_id, f.fahrt_index; ''' cur.execute(sql) rows = cur.fetchall() msg = '{0.Fahrt_Name}\t{0.abfahrt_id}\t{0.fahrt_index}\t{0.H_Ankunft} - {0.H_Abfahrt}\t{0.H_ID}\t{0.H_Name}' for row in rows: m = msg.format(row) logger.info(m)
def export_gtfs(self): """ exports the data to the path """ path = os.path.join(self.base_path, self.destination_db, self.subfolder) with Connection(self.login1) as conn: self.conn = conn self.set_search_path() cur = self.conn.cursor() sql = '''SET CLIENT_ENCODING TO '{encoding}';''' encoding = 'UTF8' cur.execute(sql.format(encoding=encoding)) tables = [ 'stops', 'agency', 'stop_times', 'routes', 'trips', 'shapes', 'calendar', 'calendar_dates', 'transfers' ] folder = path.replace('~', os.environ['HOME']) self.make_folder(folder) zipfilename = os.path.join(folder, '{}.zip'.format(self.destination_db)) with zipfile.ZipFile(zipfilename, 'w') as z: for table in tables: tn = 'gtfs_{tn}'.format(tn=table) tablename = '{tn}.txt'.format(tn=table) fn = os.path.join(folder, tablename) logger.info('write {}'.format(fn)) with open(fn, 'w') as f: sql = self.conn.copy_sql.format(tn=tn, fn=fn) logger.info(sql) cur.copy_expert(sql, f) z.write(fn, tablename) os.remove(fn) sql = '''RESET CLIENT_ENCODING;''' cur.execute(sql)
def set_ein_aus(self): sql1 = """ UPDATE trips SET ein = FALSE WHERE "H_Abfahrt" IS NULL """ sql2 = """ UPDATE trips SET aus = FALSE WHERE "H_Ankunft" IS NULL """ cur = self.conn.cursor() cur.execute(sql1) logger.info('no Entry: %s' % cur.statusmessage) cur.execute(sql2) logger.info('no Exit: %s' % cur.statusmessage) sql3 = """ UPDATE trips f SET "H_Abfahrt" = f."H_Ankunft" FROM (SELECT abfahrt_id, fahrt_index FROM (select abfahrt_id, "H_Ankunft", "H_Abfahrt", fahrt_index, max(fahrt_index) over (PARTITION BY abfahrt_id) AS maxindex FROM trips) f1 WHERE fahrt_index < maxindex AND "H_Ankunft" IS NOT NULL AND "H_Abfahrt" IS NULL) ank WHERE ank.abfahrt_id = f.abfahrt_id AND ank.fahrt_index = f.fahrt_index; """ cur.execute(sql3) logger.info('Missing Departure Time added : %s' % cur.statusmessage) sql4 = """ UPDATE trips f SET "H_Ankunft" = "H_Abfahrt" WHERE fahrt_index > 1 AND "H_Ankunft" IS NULL AND "H_Abfahrt" IS NOT NULL; """ cur.execute(sql4) logger.info('Missing Arrival Time added : %s' % cur.statusmessage)
def update_stop_id_from_database(self): """ Update stop_ids aus Deutchlandweiten Daten """ cur = self.conn.cursor() sql = """ -- setze für noch nicht gefundenen H_IDs Haltestellennummern aus verschiedenen Haltestellentabellen UPDATE trips f SET stop_id = h."H_ID" FROM stops h WHERE f.stop_id IS NULL AND f."H_Name" = h."H_Name"; """ sql2 = """ SELECT dblink_connect_u('conn', 'dbname=europe'); -- füge fehlende Haltestelle aus der Deutschland-Tabelle hinzu INSERT INTO stops ("H_ID", "H_Name", geom, kreis) SELECT "H_ID", "H_Name", st_transform(geom, {srid}) AS geom, kreis FROM dblink('conn', 'SELECT h."H_ID", h."H_Name", h.geom, h.kreis FROM timetables.haltestellen AS h') AS hd( "H_ID" integer, "H_Name" text, geom geometry, kreis text) WHERE hd."H_ID" NOT IN (SELECT DISTINCT h."H_ID" FROM stops h) --AND hd."H_ID" IN (SELECT DISTINCT f.stop_id FROM trips AS f) AND hd."H_Name" IN (SELECT DISTINCT f."H_Name" FROM trips AS f); """ query = sql logger.info(query) cur.execute(query) logger.info('{msg}'.format(msg=cur.statusmessage)) query = sql2.format(srid=self.target_srid) logger.info(query) cur.execute(query) logger.info('{msg}'.format(msg=cur.statusmessage))
def count(self): sql = 'SELECT count(*) FROM stops;' cur = self.conn.cursor() cur.execute(sql) rows = cur.fetchall() logger.info('Haltestellen: %s, ' % rows[0][0]) sql = 'SELECT count(*) FROM departures;' cur = self.conn.cursor() cur.execute(sql) rows = cur.fetchall() logger.info('Abfahrten: %s ' % rows[0][0]) sql = 'SELECT count(*) FROM trips;' cur = self.conn.cursor() cur.execute(sql) rows = cur.fetchall() logger.info('Fahrten: %s' % rows[0][0])
def update_stop_id_from_similar_routes(self): """ """ cur = self.conn.cursor() sql = """ -- UPDATE trips f SET stop_id = c.stop1 FROM (SELECT f.abfahrt_id, f.fahrt_index, b.stop1 FROM (SELECT a."Fahrt_Name",a.stopname1, a.stopname0, a.stopname2, a.stop1 FROM ( -- In jeder Zeile steht stop und stopname der Vorhaltestelle, aktuellen Haltestelle und Folgehaltestelle SELECT f."Fahrt_Name", f.abfahrt_id, f.fahrt_index, f.stop_id AS stop1, lag(f.stop_id) OVER(PARTITION BY f.abfahrt_id ORDER BY f.fahrt_index) AS stop0, lead(f.stop_id) OVER(PARTITION BY f.abfahrt_id ORDER BY f.fahrt_index) AS stop2, f."H_Name" AS stopname1, lag(f."H_Name") OVER(PARTITION BY f.abfahrt_id ORDER BY f.fahrt_index) AS stopname0, lead(f."H_Name") OVER(PARTITION BY f.abfahrt_id ORDER BY f.fahrt_index) AS stopname2 FROM trips f) AS a -- Suche für jede Fahrt_Name alle eindeutigen Haltestellenfolgen (definiert durch den Haltestellennamen GROUP BY a."Fahrt_Name",a.stopname1, a.stopname0, a.stopname2, a.stop1) b, -- Fahrtentabelle mit den Haltestellennamen der Vorgänger und Nachfolgehaltestellen (SELECT f.abfahrt_id, f.fahrt_index, f."Fahrt_Name", f.stop_id, f."H_Name" AS stopname1, lag(f."H_Name") OVER(PARTITION BY f.abfahrt_id ORDER BY f.fahrt_index) AS stopname0, lead(f."H_Name") OVER(PARTITION BY f.abfahrt_id ORDER BY f.fahrt_index) AS stopname2 FROM trips AS f ) AS f -- Aktualisier unbekannte stop_id in der Fahrtentabelle aus den bekannten Haltestellenfolgen hinzu WHERE f.stop_id IS NULL AND f."Fahrt_Name" = b."Fahrt_Name" AND f.stopname0 = b.stopname0 AND f.stopname2 = b.stopname2 AND f.stopname1 = b.stopname1 AND b.stop1 IS NOT NULL )c WHERE f.abfahrt_id = c.abfahrt_id AND f.fahrt_index = c.fahrt_index ; """ cur.execute(sql) logger.info('UPDATE stop_id FROM similar routes: %s' % cur.statusmessage) sql = """ -- setze die Haltestellennummer bei Bussen auf die kleinste bekannte "H_ID" für den Haltestellennamen UPDATE trips f SET stop_id = a.stop_id FROM ( SELECT f.abfahrt_id, f.fahrt_index, min(h."H_ID") as stop_id FROM trips f, stops h WHERE f.stop_id IS NULL AND f."H_Name" = h."H_Name" AND f."Fahrt_Name" LIKE 'Bus%' GROUP BY f.abfahrt_id, f.fahrt_index )a WHERE f.abfahrt_id = a.abfahrt_id AND f.fahrt_index = a.fahrt_index; """ cur.execute(sql) logger.info('UPDATE stop_id for busses: %s' % cur.statusmessage) sql = """ -- setze die Haltestellennummer bei Nicht-Bussen auf die größte bekannte "H_ID" für den Haltestellennamen UPDATE trips f SET stop_id = a.stop_id FROM ( SELECT f.abfahrt_id, f.fahrt_index, max(h."H_ID") as stop_id FROM trips f, stops h WHERE f.stop_id IS NULL AND f."H_Name" = h."H_Name" AND f."Fahrt_Name" NOT LIKE 'Bus%' GROUP BY f.abfahrt_id, f.fahrt_index )a WHERE f.abfahrt_id = a.abfahrt_id AND f.fahrt_index = a.fahrt_index; """ cur.execute(sql) logger.info('UPDATE stop_id for busses: %s' % cur.statusmessage) sql = """ -- Setze die Koordinaten in haltestellen auf 0, wenn unbekannte Halteselle -- Wenn keine "H_ID" übergeben wird, wird automatisch eine "H_ID" mit einem -- serial Feld > 50000000 erzeugt INSERT INTO stops ("H_Name", geom) SELECT DISTINCT f."H_Name" AS "H_Name", st_transform(ST_SETSRID(ST_MAKEPOINT(0,0), 4326), {srid}) AS geom FROM trips f WHERE f.stop_id IS NULL; -- setze für Dummy-Haltestellen die stop_id UPDATE trips f SET stop_id = h."H_ID" FROM stops h WHERE f.stop_id IS NULL AND f."H_Name" = h."H_Name"; """ self.run_query(sql.format(srid=self.target_srid)) sql = """ UPDATE trips SET stop_id_txt = stop_id::text; """ self.run_query(sql)
def show_search_path(self): cur = self.conn.cursor() sql = 'show search_path ;' cur.execute(sql) rows = cur.fetchall() logger.info(rows)
def count_multiple_abfahrten(self): """ count the number of occurences of abfahrten and save into abfahrt_id_final """ cur = self.conn.cursor() sql = """ -- zähle die doppelten Fahrten durch UPDATE departures a SET abfahrt_id_final = b.abfahrt_id_final, keep =(b.rn = 1) ::bool FROM ( SELECT a.abfahrt_id, row_number() OVER(PARTITION BY a."Fahrt_Name", a.sh_name, a.anz_haltestellen, a.sh_ab, a.eh_name, a.eh_an, a.zwh_name, a.zwh_an) AS rn, first_value(a.abfahrt_id) OVER(PARTITION BY a."Fahrt_Name", a.sh_name, a.anz_haltestellen, a.sh_ab, a.eh_name, a.eh_an, a.zwh_name, a.zwh_an) AS abfahrt_id_final FROM ( SELECT a.abfahrt_id, a."Fahrt_Name", a."Fahrt_Ziel", a."H_ID", f."H_ID" AS sh_id, f."H_Name" AS sh_name, f."H_Abfahrt" AS sh_ab, f1.fahrt_index AS anz_haltestellen, f1."H_ID" AS eh_id, f1."H_Name" AS eh_name, f1."H_Ankunft" AS eh_an, f2."H_ID" AS zwh_id, f2."H_Name" AS zwh_name, f2."H_Ankunft" AS zwh_an FROM departures a, ( SELECT f.abfahrt_id, f."H_ID", f."H_Ankunft", f."H_Name", f.fahrt_index, row_number() OVER(PARTITION BY f.abfahrt_id ORDER BY f.fahrt_index DESC) rn FROM trips f ) f1, -- letzte Haltestelle der Fahrt row_number() mit ORDER BY fahrt_index descending, also rückwärte trips f, -- erste Haltestelle der Fahrt trips f2 -- zweite Haltestelle der Fahrt WHERE a.abfahrt_id = f.abfahrt_id AND a.abfahrt_id = f1.abfahrt_id AND a.abfahrt_id = f2.abfahrt_id AND f.fahrt_index = 1 -- erste Haltestelle AND f1.rn = 1 --letzte Haltestelle AND f2.fahrt_index = 2 --auch 2. Haltestelle gleich, damit Ringfahrten (U3 Barmbek-Barmbek) nicht aus versehen gelöscht werden ORDER BY a."Fahrt_Name", sh_ab, sh_name ) a ) b WHERE b.abfahrt_id = a.abfahrt_id; """ cur.execute(sql) logger.info('Multiple_abfahrten: %s' % cur.statusmessage) sql = """ -- zähle, wie viele doppelte vorhanden SELECT keep, count(*) FROM departures GROUP BY keep; """ cur.execute(sql) rows = cur.fetchall() msg = 'keep abfahrten: {0.keep}: {0.count}' for row in rows: pass