def __init__(self): self.G = GTFS(GTFS_DATA_BASEDIR) self.day_start_ut = self.G.get_suitable_date_for_daily_extract(ut=True) + 3600 self.start_time = self.day_start_ut + 8 * 3600 self.end_time = self.day_start_ut + 11 * 3600 self.profiles = {} self.journey_analyzer = None
def _write_stats(self): G = GTFS(self.day_db_path) net = combined_stop_to_stop_transit_network(G) sections = net.edges(data=True) n_links = len(sections) section_lengths = [] vehicle_kilometers_per_section = [] for from_I, to_I, data in sections: section_lengths.append(data['d']) vehicle_kilometers_per_section.append(data['n_vehicles'] * data['d'] / 1000.) stats = {"n_stops": len(G.stops(require_reference_in_stop_times=True)), "n_connections": len(G.get_transit_events()), "n_links": n_links, "network_length_m": sum(section_lengths), "link_distance_avg_m": int(sum(section_lengths) / len(section_lengths)), "vehicle_kilometers": sum(vehicle_kilometers_per_section), "buffer_center_lat": self.lat, "buffer_center_lon": self.lon, "buffer_radius_km": self.buffer_distance, "extract_start_date": self.get_weekly_extract_start_date().strftime("%Y-%m-%d") } self.__verify_stats(stats) df = pandas.DataFrame.from_dict({key:[value] for key, value in stats.items()}) df.to_csv(self.stats_fname, sep=";", columns=list(sorted(stats.keys())), index=False)
def setUp(self): self.gtfs_source_dir = os.path.join(os.path.dirname(__file__), "test_data") self.gtfs_source_dir_filter_test = os.path.join( self.gtfs_source_dir, "filter_test_feed/") # self.G = GTFS.from_directory_as_inmemory_db(self.gtfs_source_dir) # some preparations: self.fname = self.gtfs_source_dir + "/test_gtfs.sqlite" self.fname_copy = self.gtfs_source_dir + "/test_gtfs_copy.sqlite" self.fname_filter = self.gtfs_source_dir + "/test_gtfs_filter_test.sqlite" self._remove_temporary_files() self.assertFalse(os.path.exists(self.fname_copy)) conn = sqlite3.connect(self.fname) import_gtfs(self.gtfs_source_dir, conn, preserve_connection=True, print_progress=False) conn_filter = sqlite3.connect(self.fname_filter) import_gtfs(self.gtfs_source_dir_filter_test, conn_filter, preserve_connection=True, print_progress=False) self.G = GTFS(conn) self.G_filter_test = GTFS(conn_filter) self.hash_orig = hashlib.md5(open(self.fname, 'rb').read()).hexdigest()
def test_shape_break_order_1(self, trip_I=73775): """This is to a bug related to shape alignment.""" conn = GTFS('../scratch/db/hsl-2015-07-12.sqlite').conn cur = conn.cursor() cur.execute( '''SELECT seq, lat, lon FROM stop_times LEFT JOIN stops USING (stop_I) WHERE trip_I=? ORDER BY seq''', (trip_I, )) #print '%20s, %s'%(run_code, datetime.fromtimestamp(run_sch_starttime)) stop_points = [dict(seq=row[0], lat=row[1], lon=row[2]) for row in cur] # Get the shape points shape_id = cur.execute( '''SELECT shape_id FROM trips WHERE trip_I=?''', (trip_I, )).fetchone()[0] shape_points = shapes.get_shape_points(cur, shape_id) breakpoints, badness \ = shapes.find_segments(stop_points, shape_points) print(badness) if badness > 30: print("bad shape fit: %s (%s, %s)" % (badness, trip_I, shape_id)) for b1, b2 in zip(breakpoints, sorted(breakpoints)): self.assertEqual(b1, b2)
def test_frequencyLoader(self): import_gtfs(self.fdict, self.conn, preserve_connection=True) # "\nfrequency_route, freq_service, freq_trip, going north, freq_name, shape_es1" \ keys = ["trip_I", "start_time", "end_time", "headway_secs", "exact_times", "start_time_ds", "end_time_ds"] self.setDictConn() rows = self.conn.execute("SELECT * FROM frequencies").fetchall() for key in keys: row = rows[0] assert key in row for row in rows: if row["start_time_ds"] == 14 * 3600: self.assertEqual(row["exact_times"], 1) # there should be twelve trips with service_I freq count = self.conn.execute("SELECT count(*) AS count FROM trips JOIN calendar " "USING(service_I) WHERE service_id='freq_service'").fetchone()['count'] assert count == 12, count rows = self.conn.execute("SELECT trip_I FROM trips JOIN calendar " "USING(service_I) WHERE service_id='freq_service'").fetchall() for row in rows: trip_I = row['trip_I'] res = self.conn.execute("SELECT * FROM stop_times WHERE trip_I={trip_I}".format(trip_I=trip_I)).fetchall() assert len(res) > 1, res self.setRowConn() g = GTFS(self.conn) print("Stop times: \n\n ", g.get_table("stop_times")) print("Frequencies: \n\n ", g.get_table("frequencies"))
def _correct_coordinates_for_raw_db(self): g = GTFS(self.raw_db_path) df = self.coordinate_corrections for feed in self.feeds: feed_df = df.loc[df["feed"] == feed] print("Updating", len(feed_df.index), "known stop coordinate errors") g.update_stop_coordinates(feed_df)
def __init__(self, gtfssource, gtfs, verbose=True): """ Parameters ---------- gtfs_sources: list, string, dict list of paths to the strings, or a dictionary directly containing the gtfs data directly gtfs: gtfspy.gtfs.GTFS, or path to a relevant .sqlite GTFS database verbose: bool Whether or not to print warnings on-the-fly. """ if isinstance(gtfssource, string_types + (dict, )): self.gtfs_sources = [gtfssource] else: assert isinstance(gtfssource, list) self.gtfs_sources = gtfssource assert len( self.gtfs_sources ) > 0, "There needs to be some source files for validating an import" if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.location = self.gtfs.get_location_name() self.warnings_container = WarningsContainer() self.verbose = verbose
def validate_day_start_ut(conn): """This validates the day_start_ut of the days table.""" G = GTFS(conn) cur = conn.execute('SELECT date, day_start_ut FROM days') for date, day_start_ut in cur: #print date, day_start_ut assert day_start_ut == G.get_day_start_ut(date)
def get_stop_I_by_stop_id(stop_id): from gtfspy.gtfs import GTFS g = GTFS(IMPORTED_DATABASE_PATH) query = "SELECT stop_I FROM stops WHERE stop_id='" + str(stop_id) + "';" print(stop_id) stop_I = g.execute_custom_query(query).fetchone()[0] return stop_I
def test_filter_spatially(self): # test that the db is split by a given spatial boundary FilterExtract(self.G, self.fname_copy, buffer_lat=36.914893, buffer_lon=-116.76821, buffer_distance_km=50).create_filtered_copy() G_copy = GTFS(self.fname_copy) stops_table = G_copy.get_table("stops") self.assertNotIn("FUR_CREEK_RES", stops_table['stop_id'].values) self.assertIn("AMV", stops_table['stop_id'].values) self.assertEqual(len(stops_table['stop_id'].values), 8) conn_copy = sqlite3.connect(self.fname_copy) stop_ids_df = pandas.read_sql( 'SELECT stop_id from stop_times ' 'left join stops ' 'on stops.stop_I = stop_times.stop_I', conn_copy) stop_ids = stop_ids_df["stop_id"].values self.assertNotIn("FUR_CREEK_RES", stop_ids) self.assertIn("AMV", stop_ids) trips_table = G_copy.get_table("trips") self.assertNotIn("BFC1", trips_table['trip_id'].values) routes_table = G_copy.get_table("routes") self.assertNotIn("BFC", routes_table['route_id'].values)
def __init__(self, journey_db_path, gtfs_path): assert os.path.isfile(journey_db_path) assert os.path.isfile(gtfs_path) self.conn = sqlite3.connect(journey_db_path) self.g = GTFS(gtfs_path) self.gtfs_path = gtfs_path self.conn = attach_database(self.conn, self.gtfs_path)
def test_filter_end_date_not_included(self): # the end date should not be included: FilterExtract(self.G, self.fname_copy, start_date="2007-01-02", end_date="2010-12-31").create_filtered_copy() hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest() self.assertNotEqual(self.hash_orig, hash_copy) G_copy = GTFS(self.fname_copy) dsut_end = G_copy.get_day_start_ut("2010-12-31") dsut_to_trip_I = G_copy.get_tripIs_within_range_by_dsut( dsut_end, dsut_end + 24 * 3600) self.assertEqual(len(dsut_to_trip_I), 0) calendar_copy = G_copy.get_table("calendar") max_date_calendar = max([ datetime.datetime.strptime(el, "%Y-%m-%d") for el in calendar_copy["end_date"].values ]) min_date_calendar = max([ datetime.datetime.strptime(el, "%Y-%m-%d") for el in calendar_copy["start_date"].values ]) end_date_not_included = datetime.datetime.strptime( "2010-12-31", "%Y-%m-%d") start_date_not_included = datetime.datetime.strptime( "2007-01-01", "%Y-%m-%d") self.assertLess(max_date_calendar, end_date_not_included, msg="the last date should not be included in calendar") self.assertLess(start_date_not_included, min_date_calendar) os.remove(self.fname_copy)
def __init__(self, gtfssource, gtfs): """ Parameters ---------- gtfs_sources: list of strings gtfs: GTFS, or path to a GTFS object A GTFS object """ self.df_freq_dict = {} if isinstance(gtfssource, string_types + (dict, )): self.gtfs_sources = [gtfssource] else: assert isinstance(gtfssource, list) self.gtfs_sources = gtfssource assert len( self.gtfs_sources ) > 0, "There needs to be some source files for validating an import" if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.location = self.gtfs.get_location_name() self.warnings_container = WarningsContainer()
def add_extra_locations_to_stops_table(): g = GTFS(IMPORTED_DATABASE_PATH) for location in EXTRA_LOCATIONS: id = location['id'] lat = location['lat'] lon = location['lon'] g.add_stop(id, "", id.replace("ADDED_", ""), "", lat, lon)
def main_make_views(gtfs_fname): """Re-create all views. """ print("creating views") conn = GTFS(fname_or_conn=gtfs_fname).conn for L in Loaders: L(None).make_views(conn) conn.commit()
def __init__(self, gtfs_name): if isinstance(gtfs_name, str): self.gtfs = GTFS(FEED_DICT[gtfs_name]["gtfs_dir"]) else: self.gtfs = gtfs_name self.bunching_value = 99 self.line_spacing = 0.0001 self.shapes = False self.crs_wgs = {'init': 'epsg:4326'}
def __init__(self, gtfs_path, journey_db_path, routing_params=None, multitarget_routing=False, track_vehicle_legs=True, track_route=False): """ :param gtfs: GTFS object :param list_of_stop_profiles: dict of NodeProfileMultiObjective :param multitarget_routing: bool """ self.multitarget_routing = multitarget_routing self.track_route = track_route self.track_vehicle_legs = track_vehicle_legs self.gtfs_path = gtfs_path self.gtfs = GTFS(self.gtfs_path) self.gtfs_meta = self.gtfs.meta self.gtfs._dont_close = True self.od_pairs = None self._targets = None self._origins = None self.diff_conn = None if not routing_params: routing_params = dict() self.routing_params_input = routing_params assert os.path.exists(journey_db_path) or routing_params is not None journey_db_pre_exists = os.path.isfile(journey_db_path) # insert a pretty robust timeout: timeout = 100 self.conn = sqlite3.connect(journey_db_path, timeout) if not journey_db_pre_exists: self.initialize_database() self.routing_parameters = Parameters(self.conn) self._assert_journey_computation_paramaters_match() self.journey_properties = { "journey_duration": (_T_WALK_STR, _T_WALK_STR) } if routing_params.get('track_vehicle_legs', False) or \ self.routing_parameters.get('track_vehicle_legs', False): self.journey_properties["n_boardings"] = (float("inf"), 0) if self.track_route: additional_journey_parameters = { "in_vehicle_duration": (float('inf'), 0), "transfer_wait_duration": (float('inf'), 0), "walking_duration": (_T_WALK_STR, _T_WALK_STR), "pre_journey_wait_fp": (float('inf'), 0) } self.journey_properties.update(additional_journey_parameters) self.travel_impedance_measure_names = list( self.journey_properties.keys()) self.travel_impedance_measure_names += ["temporal_distance"]
def add_swimming_halls_to_stops_table(): g = GTFS(IMPORTED_DATABASE_PATH) halls = get_swimming_hall_data() for hall in halls: lat = hall['latitude'] lon = hall['longitude'] name = hall['name_en'].replace(" ", "_") id = SWIMMING_HALL_ID_PREFIX + name + "_" + str(hall['id']) g.add_stop(id, "NULL", name, "NULL", lat, lon)
def __init__(self, gtfs_path, before_db_path, after_db_path, output_db): self.gtfs = GTFS(gtfs_path) print(output_db) self._create_indecies(before_db_path) self._create_indecies(after_db_path) self.conn = sqlite3.connect(output_db) self.conn = attach_database(self.conn, before_db_path, name="before") self.conn = attach_database(self.conn, after_db_path, name="after")
def stops_to_exclude(return_sqlite_list=False): gtfs_lm = GTFS(LM_DICT["gtfs_dir"]) areas_to_remove = gtfs_lm.execute_custom_query_pandas( "SELECT * FROM stops WHERE CASE WHEN substr(stop_id,1, 5) = '__b__' THEN CAST(substr(stop_id,6, 1) AS integer) ELSE CAST(substr(stop_id,1, 1) AS integer) END >4" ) if return_sqlite_list: return "(" + ",".join( [str(x) for x in areas_to_remove["stop_I"].tolist()]) + ")" return areas_to_remove
def _export_transfers(conn, fname): conn = GTFS(conn).conn cur = conn.cursor() cur.execute('SELECT S1.lat, S1.lon, S2.lat, S2.lon, SD.d ' 'FROM stop_distances SD ' ' LEFT JOIN stops S1 ON (SD.from_stop_I=S1.stop_I) ' ' LEFT JOIN stops S2 ON (SD.to_stop_I =S2.stop_I)') f = open(fname, 'w') for row in cur: print(' '.join(str(x) for x in row), file=f)
def post_import2(self, conn): # TODO! Something whould be done with this! Multiple feeds are possible, currently only selects one row for all feeds G = GTFS(conn) for name in [ 'feed_publisher_name', 'feed_publisher_url', 'feed_lang', 'feed_start_date', 'feed_end_date', 'feed_version' ]: value = conn.execute('SELECT %s FROM feed_info' % name).fetchone()[0] if value: G.meta['feed_info_' + name] = value
def __init__(self, gtfs, buffer_params=None): """ Parameters ---------- gtfs: GTFS, or path to a GTFS object A GTFS object """ if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.buffer_params = buffer_params self.warnings_container = WarningsContainer()
def __create_temporal_extract_from_main_db(self, days, output_db_path): if os.path.isfile(output_db_path): os.remove(output_db_path) main_G = GTFS(self.main_db_path) assert isinstance(main_G, GTFS) day_extract_date_start = self.get_weekly_extract_start_date() start_date_ut = main_G.get_day_start_ut(day_extract_date_start) three_am_seconds = 3 * 3600 fe = filter.FilterExtract(main_G, output_db_path, update_metadata=True, trip_earliest_start_time_ut=start_date_ut + three_am_seconds, # inclusive trip_latest_start_time_ut=start_date_ut + three_am_seconds + days * 24 * 3600) # exclusive fe.create_filtered_copy()
def test_filter_by_start_and_end_full_range(self): # untested tables with filtering: stops, shapes # test filtering by start and end time, copy full range FilterExtract(self.G, self.fname_copy, start_date=u"2007-01-01", end_date=u"2011-01-01", update_metadata=False).create_filtered_copy() G_copy = GTFS(self.fname_copy) dsut_end = G_copy.get_day_start_ut("2010-12-31") dsut_to_trip_I = G_copy.get_tripIs_within_range_by_dsut( dsut_end, dsut_end + 24 * 3600) self.assertGreater(len(dsut_to_trip_I), 0) os.remove(self.fname_copy)
def test_get_main_database_path(self): self.assertEqual(self.gtfs.get_main_database_path(), "", "path of an in-memory database should equal ''") from gtfspy.import_gtfs import import_gtfs try: fname = self.gtfs_source_dir + "/test_gtfs.sqlite" if os.path.exists(fname) and os.path.isfile(fname): os.remove(fname) conn = sqlite3.connect(fname) import_gtfs(self.gtfs_source_dir, conn, preserve_connection=True, print_progress=False) G = GTFS(conn) self.assertTrue(os.path.exists(G.get_main_database_path())) self.assertIn(u"/test_gtfs.sqlite", G.get_main_database_path(), "path should be correct") finally: if os.path.exists(fname) and os.path.isfile(fname): os.remove(fname)
def plot_city_figs(cities=None, axes=None, save_figure=True): if cities is None: cities = sorted(ALL_CITIES) for i, city in enumerate(cities): print("Plotting " + city) if axes is not None: ax = axes[i] else: fig = plt.figure(figsize=(6., 4.)) ax = fig.add_subplot(111) fig.subplots_adjust(left=0.0, right=1.0, top=1.0, bottom=0.0) to_publish_csv = get_to_publish_csv() city_data = to_publish_csv[to_publish_csv["id"] == city].iloc[0] feeds = get_feeds_from_to_publish_tuple(city_data) pipeline = ExtractPipeline(city_data, feeds) try: day_G = GTFS(pipeline.day_db_path) ax = plot_route_network_from_gtfs(day_G, map_style="dark_all", ax=ax) except FileNotFoundError as e: print("File " + pipeline.day_db_path + " was not found") if save_figure: fig_path = os.path.join(FIG_PATH_DIR, city + ".pdf") ax.figure.savefig(fig_path) print("Figure saved to: \n" + fig_path)
class GenericJourneyDataAnalysisPipeline: def __init__(self): self.G = GTFS(GTFS_DATA_BASEDIR) self.day_start_ut = self.G.get_suitable_date_for_daily_extract(ut=True) + 3600 self.start_time = self.day_start_ut + 8 * 3600 self.end_time = self.day_start_ut + 11 * 3600 self.profiles = {} self.journey_analyzer = None # self.analysis_start_time # self.analysis_end_time def script(self): journey_analyzer = JourneyDataAnalyzer(JOURNEY_DATA_DIR, GTFS_DATA_BASEDIR) if False: gdf = journey_analyzer.get_transfer_stops() gdf.to_file(shapefile_dir('transfer_stops'), driver='ESRI Shapefile') gdf = journey_analyzer.get_transfer_walks() gdf.to_file(shapefile_dir('transfer_walks'), driver='ESRI Shapefile') gdf = journey_analyzer.journeys_per_section() gdf.to_file(shapefile_dir('journeys_per_section'), driver='ESRI Shapefile') gdf = journey_analyzer.journey_alternatives_per_stop() gdf.to_file(shapefile_dir('journeys_per_stop'), driver='ESRI Shapefile') journey_analyzer.n_route_alternatives()
def get_weekly_extract_start_date(self): """ Returns ------- datetime.datetime """ print("Weekly extract start date") if isinstance(self.extract_start_date, str): assert(len(self.extract_start_date) == 10) print("Obtained from to_publish.csv") return datetime.datetime.strptime(self.extract_start_date, "%Y-%m-%d") else: main_G = GTFS(self.main_db_path) print("Automatically computed based on database") assert isinstance(main_G, GTFS) day_extract_date_start = main_G.get_weekly_extract_start_date() return day_extract_date_start
def test_filter_by_agency(self): FilterExtract(self.G, self.fname_copy, agency_ids_to_preserve=['DTA']).create_filtered_copy() hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest() self.assertNotEqual(self.hash_orig, hash_copy) G_copy = GTFS(self.fname_copy) agency_table = G_copy.get_table("agencies") assert "EXA" not in agency_table[ 'agency_id'].values, "EXA agency should not be preserved" assert "DTA" in agency_table[ 'agency_id'].values, "DTA agency should be preserved" routes_table = G_copy.get_table("routes") assert "EXR1" not in routes_table[ 'route_id'].values, "EXR1 route_id should not be preserved" assert "AB" in routes_table[ 'route_id'].values, "AB route_id should be preserved" trips_table = G_copy.get_table("trips") assert "EXT1" not in trips_table[ 'trip_id'].values, "EXR1 route_id should not be preserved" assert "AB1" in trips_table[ 'trip_id'].values, "AB1 route_id should be preserved" calendar_table = G_copy.get_table("calendar") assert "FULLW" in calendar_table[ 'service_id'].values, "FULLW service_id should be preserved" # stop_times stop_times_table = G_copy.get_table("stop_times") # 01:23:45 corresponds to 3600 + (32 * 60) + 45 [in day seconds] assert 3600 + (32 * 60) + 45 not in stop_times_table['arr_time'] os.remove(self.fname_copy)