def test_connectionscan_data_constructor_basic(): stops_per_id = { "1": Stop("1", "c1", "n1", 0.0, 0.0), "2": Stop("2", "c2", "n2", 1.0, 1.0), "2a": Stop("2a", "c2a", "n2a", 1.1, 1.1), "3": Stop("3", "c3", "n3", 3.0, 3.0), } footpaths_per_from_to_stop_id = { ("1", "1"): Footpath("1", "1", 60), ("2", "2"): Footpath("2", "2", 70), ("2a", "2a"): Footpath("2a", "2a", 71), ("3", "3"): Footpath("3", "3", 80), ("2", "2a"): Footpath("2", "2a", 75), ("2a", "2"): Footpath("2a", "2", 75), } con_1_1 = Connection("t1", "1", "2", 60, 70) con_1_2 = Connection("t1", "2", "3", 72, 80) con_2_1 = Connection("t2", "2", "3", 50, 59) con_2_2 = Connection("t2", "3", "1", 60, 72) trips_per_id = { "t1": Trip("t1", [con_1_1, con_1_2]), "t2": Trip("t2", [con_2_1, con_2_2]) } cs_data = ConnectionScanData(stops_per_id, footpaths_per_from_to_stop_id, trips_per_id) assert 4 == len(cs_data.stops_per_id) assert 4 == len(cs_data.stops_per_id) assert 2 == len(cs_data.trips_per_id) assert [con_2_1, con_1_1, con_2_2, con_1_2] == cs_data.sorted_connections
def create_test_connectionscan_data(): stops_per_id = { s.id: s for s in [ fribourg, bern, zuerich_hb, winterthur, st_gallen, interlaken_ost, basel_sbb, chur, thusis, samedan, st_moritz, bern_duebystrasse, koeniz_zentrum, bern_bahnhof, ostermundigen_bahnhof, samedan_bahnhof, samedan_spital, ] } footpaths_per_from_stop_to_stop_id = {(s.id, s.id): Footpath(s.id, s.id, 2 * 60) for s in stops_per_id.values()} footpaths_per_from_stop_to_stop_id[(zuerich_hb.id, zuerich_hb.id)] = Footpath( zuerich_hb.id, zuerich_hb.id, 7 * 60) footpaths_per_from_stop_to_stop_id[(bern.id, bern.id)] = Footpath( bern.id, bern.id, 5 * 60) footpaths_per_from_stop_to_stop_id[(bern_bahnhof.id, bern.id)] = Footpath( bern_bahnhof.id, bern.id, 5 * 60) footpaths_per_from_stop_to_stop_id[(bern.id, bern_bahnhof.id)] = Footpath( bern.id, bern_bahnhof.id, 5 * 60) footpaths_per_from_stop_to_stop_id[(chur.id, chur.id)] = Footpath( chur.id, chur.id, 4 * 60) footpaths_per_from_stop_to_stop_id[(samedan.id, samedan_bahnhof.id)] = Footpath( samedan.id, samedan_bahnhof.id, 3 * 60) footpaths_per_from_stop_to_stop_id[(samedan_bahnhof.id, samedan.id)] = Footpath( samedan_bahnhof.id, samedan.id, 3 * 60) trips = [] trips += get_forth_and_back_trips( [fribourg, bern, zuerich_hb, winterthur, st_gallen], [22 * 60, 56 * 60, 26 * 60, 35 * 60], [6 * 60, 9 * 60, 3 * 60], hhmmss_to_sec("05:34:00"), 32, 30 * 60) trips += get_forth_and_back_trips([interlaken_ost, bern, basel_sbb], [52 * 60, 55 * 60], [12 * 60], hhmmss_to_sec("05:00:00"), 16, 60 * 60) trips += get_forth_and_back_trips([basel_sbb, zuerich_hb, chur], [53 * 60, 75 * 60], [11 * 60], hhmmss_to_sec("05:33:00"), 16, 60 * 60) trips += get_forth_and_back_trips([chur, thusis, samedan, st_moritz], [30 * 60, 75 * 60, 12 * 60], [2 * 60, 6 * 60], hhmmss_to_sec("05:58:00"), 16, 60 * 60) trips += get_forth_and_back_trips([ koeniz_zentrum, bern_duebystrasse, bern_bahnhof, ostermundigen_bahnhof ], [6 * 60, 7 * 60, 15 * 60], [0, 0], hhmmss_to_sec("05:00:00"), 10 * 16, 6 * 60) trips += get_forth_and_back_trips([samedan_bahnhof, samedan_spital], [7 * 60], [], hhmmss_to_sec("15:00:00"), 1, 24 * 60 * 60) return ConnectionScanData(stops_per_id, footpaths_per_from_stop_to_stop_id, {t.id: t for t in trips})
def test_connectionscan_data_constructor_stop_ids_in_trips_not_consistent_with_stops(): with pytest.raises(ValueError): ConnectionScanData({"s1": Stop("s1", "", "", 0.0, 0.0)}, {}, {"t": Trip("t", [Connection("t", "s1", "s2", 30, 40)])}) log_end(additional_message="test failed successful")
def test_connectionscan_data_constructor_trip_id_not_consistent(): with pytest.raises(ValueError): ConnectionScanData({}, {}, {"t1": Trip("t", [])}) log_end(additional_message="test failed successful")
def test_connectionscan_data_constructor_stops_in_footpath_and_stops_not_consistent(): with pytest.raises(ValueError): ConnectionScanData({"s1": Stop("s1", "", "", 0.0, 0.0)}, {("s1", "s2"): Footpath("s1", "s2", 60)}, {}) log_end(additional_message="test failed successful")
def parse_gtfs( path_to_gtfs_zip, desired_date, add_beeline_footpaths=True, beeline_distance=100.0, walking_speed=2.0 / 3.6, make_footpaths_transitive=False ): """Parses a gtfs-file and returns the corresponding timetable data of a specific date. In many GTFS files the information about the footpaths/transfers is not complete. In these cases it is recommended to define appropriate footpaths within a beeline distance. Args: path_to_gtfs_zip (str): path to the gtfs-file (weblink or path to a zip-file). desired_date (date): date on which the timetable data is read. add_beeline_footpaths (obj:`bool`, optional): specifies whether footpaths should be created depending on the beeline (air distance) and independent of the transfers.txt gtfs-file or not. beeline_distance (obj:`float`, optional): radius in meter of the perimeter (circle) to create the beeline footpaths (only relevant if add_beeline_footpaths is True). walking_speed (obj:`float`, optional): walking speed in meters per second for calculating the walking time of the created beeline footpaths (only relevant if add_beeline_footpaths is True). make_footpaths_transitive (obj:`bool`, optional): True if the footpaths are to be made transitive, else False. Making footpaths transitive can lead to long running times and implausible results. Returns: ConnectionScanData: timetable data of the specific date. """ log_start("parsing gtfs-file for desired date {} ({})".format(desired_date, path_to_gtfs_zip), log) stops_per_id = {} footpaths_per_from_to_stop_id = {} trips_per_id = {} with ZipFile(path_to_gtfs_zip, "r") as zip_file: log_start("parsing stops.txt", log) with zip_file.open("stops.txt", "r") as gtfs_file: # required reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING)) header = next(reader) id_index = header.index("stop_id") # required code_index = get_index_with_default(header, "stop_code") # optional name_index = get_index_with_default(header, "stop_name") # conditionally required lat_index = get_index_with_default(header, "stop_lat") # conditionally required lon_index = get_index_with_default(header, "stop_lon") # conditionally required location_type_index = get_index_with_default(header, "location_type") parent_station_index = get_index_with_default(header, "parent_station") for row in reader: stop_id = row[id_index] is_station = row[location_type_index] == "1" if location_type_index else False parent_station_id = ((row[parent_station_index] if row[parent_station_index] != "" else None) if parent_station_index else None) stops_per_id[stop_id] = Stop( stop_id, row[code_index] if code_index else "", row[name_index] if name_index else "", float(row[lon_index]) if lon_index else 0.0, float(row[lat_index]) if lat_index else 0.0, is_station=is_station, parent_station_id=parent_station_id ) log_end(additional_message="# stops: {}".format(len(stops_per_id))) log_start("parsing transfers.txt", log) if "transfers.txt" in zip_file.namelist(): with zip_file.open("transfers.txt", "r") as gtfs_file: # optional reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING)) header = next(reader) from_stop_id_index = header.index("from_stop_id") # required to_stop_id_index = header.index("to_stop_id") # required transfer_type_index = header.index("transfer_type") # required min_transfer_time_index = get_index_with_default(header, "min_transfer_time") # optional if min_transfer_time_index: nb_footpaths_not_added = 0 for row in reader: if row[transfer_type_index] == "2": from_stop_id = row[from_stop_id_index] to_stop_id = row[to_stop_id_index] if from_stop_id in stops_per_id and to_stop_id in stops_per_id: footpaths_per_from_to_stop_id[(from_stop_id, to_stop_id)] = Footpath( from_stop_id, to_stop_id, int(row[min_transfer_time_index]) ) else: nb_footpaths_not_added += 1 log.debug(("footpath from {} to {} cannot be defined since not both stops are defined " "in stops.txt").format(from_stop_id, to_stop_id)) if nb_footpaths_not_added > 0: log.info(("{} rows from transfers.txt were not added to footpaths since either the " "from_stop_id or to_stop_id is not defined in stops.txt.").format( nb_footpaths_not_added)) else: raise ValueError(("min_transfer_time column in gtfs transfers.txt file is not defined, " "cannot calculate footpaths.")) log_end(additional_message="# footpaths from transfers.txt: {}".format(len(footpaths_per_from_to_stop_id))) log_start("adding footpaths to parent station", log) nb_parent_footpaths = 0 for a_stop in stops_per_id.values(): if a_stop.parent_station_id is not None: key = (a_stop.id, a_stop.parent_station_id) if key not in footpaths_per_from_to_stop_id: footpaths_per_from_to_stop_id[key] = Footpath(key[0], key[1], 0) nb_parent_footpaths += 1 if (key[1], key[0]) not in footpaths_per_from_to_stop_id: footpaths_per_from_to_stop_id[(key[1], key[0])] = Footpath(key[1], key[0], 0) nb_parent_footpaths += 1 log_end(additional_message="# footpath from/to parent_station added: {}. # footpaths total: {}".format( nb_parent_footpaths, len(footpaths_per_from_to_stop_id))) log_start("adding footpaths within stops (if not defined)", log) nb_loops = 0 for stop_id in stops_per_id.keys(): from_to_stop_id = (stop_id, stop_id) if from_to_stop_id not in footpaths_per_from_to_stop_id: footpaths_per_from_to_stop_id[from_to_stop_id] = Footpath(stop_id, stop_id, 0) # best guess!! nb_loops += 1 log_end(additional_message="# footpath loops added: {}, # footpaths total: {}".format(nb_loops, len( footpaths_per_from_to_stop_id))) if add_beeline_footpaths: create_beeline_footpaths(stops_per_id, footpaths_per_from_to_stop_id, beeline_distance, walking_speed) else: log.info("adding beeline footpaths is deactivated") if make_footpaths_transitive: make_transitive(footpaths_per_from_to_stop_id) else: log.info("making footpaths transitive is deactivated") log_start("parsing calendar.txt and calendar_dates.txt", log) service_available_at_date_per_service_id = get_service_available_at_date_per_service_id(zip_file, desired_date) log_end() log_start("parsing trips.txt", log) trip_available_at_date_per_trip_id, route_id_per_trip_id = \ get_trip_available_at_date_per_trip_id(zip_file, service_available_at_date_per_service_id) if len(trip_available_at_date_per_trip_id): msg = "# trips available at {}: {}".format(desired_date, len(trip_available_at_date_per_trip_id)) else: msg = "no trips available at {}. assure that the date is within the timetable period.".format(desired_date) log_end(additional_message=msg) log_start("parsing routes.txt and assigning route_type to trip_id", log) with zip_file.open("routes.txt", "r") as gtfs_file: # required reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING)) header = next(reader) route_id_index = header.index("route_id") # required route_type_index = header.index("route_type") # required route_type_per_route_id = {} for row in reader: route_type_per_route_id[row[route_id_index]] = int(row[route_type_index]) route_type_per_trip_id = {trip_id: route_type_per_route_id[route_id_per_trip_id[trip_id]] for trip_id in route_id_per_trip_id} log_end() log_start("parsing stop_times.txt", log) with zip_file.open("stop_times.txt", "r") as gtfs_file: # required reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING)) header = next(reader) trip_id_index = header.index("trip_id") # required stop_id_index = header.index("stop_id") # required arrival_time_index = get_index_with_default(header, "arrival_time") # conditionally required departure_time_index = get_index_with_default(header, "departure_time") # conditionally required def process_rows_of_trip(rows): if rows: trip_id = rows[0][trip_id_index] if trip_available_at_date_per_trip_id[trip_id]: connections = [] for i in range(len(rows) - 1): from_row = rows[i] to_row = rows[i + 1] con_dep = from_row[departure_time_index] if departure_time_index else None con_arr = to_row[arrival_time_index] if arrival_time_index else None if con_dep and con_arr: connections += [Connection( trip_id, from_row[stop_id_index], to_row[stop_id_index], hhmmss_to_sec(con_dep), hhmmss_to_sec(con_arr))] else: return # we do not want trips with missing times try: trip_type = TripType(route_type_per_trip_id[trip_id]) except ValueError: trip_type = TripType.UNKNOWN trips_per_id[trip_id] = Trip(trip_id, connections, trip_type) last_trip_id = None row_list = [] for row in reader: act_trip_id = row[trip_id_index] if last_trip_id == act_trip_id: row_list += [row] else: process_rows_of_trip(row_list) last_trip_id = act_trip_id row_list = [row] process_rows_of_trip(row_list) log_end(additional_message="# trips: {}".format(len(trips_per_id))) cs_data = ConnectionScanData(stops_per_id, footpaths_per_from_to_stop_id, trips_per_id) log_end(additional_message="{}".format(cs_data)) return cs_data