def test_connectionscan_data_constructor_basic():
    stops_per_id = {
        "1": Stop("1", "c1", "n1", 0.0, 0.0),
        "2": Stop("2", "c2", "n2", 1.0, 1.0),
        "2a": Stop("2a", "c2a", "n2a", 1.1, 1.1),
        "3": Stop("3", "c3", "n3", 3.0, 3.0),
    }

    footpaths_per_from_to_stop_id = {
        ("1", "1"): Footpath("1", "1", 60),
        ("2", "2"): Footpath("2", "2", 70),
        ("2a", "2a"): Footpath("2a", "2a", 71),
        ("3", "3"): Footpath("3", "3", 80),
        ("2", "2a"): Footpath("2", "2a", 75),
        ("2a", "2"): Footpath("2a", "2", 75),
    }

    con_1_1 = Connection("t1", "1", "2", 60, 70)
    con_1_2 = Connection("t1", "2", "3", 72, 80)

    con_2_1 = Connection("t2", "2", "3", 50, 59)
    con_2_2 = Connection("t2", "3", "1", 60, 72)

    trips_per_id = {
        "t1": Trip("t1", [con_1_1, con_1_2]),
        "t2": Trip("t2", [con_2_1, con_2_2])
    }
    cs_data = ConnectionScanData(stops_per_id, footpaths_per_from_to_stop_id, trips_per_id)
    assert 4 == len(cs_data.stops_per_id)
    assert 4 == len(cs_data.stops_per_id)
    assert 2 == len(cs_data.trips_per_id)
    assert [con_2_1, con_1_1, con_2_2, con_1_2] == cs_data.sorted_connections
def create_test_connectionscan_data():
    stops_per_id = {
        s.id: s
        for s in [
            fribourg,
            bern,
            zuerich_hb,
            winterthur,
            st_gallen,
            interlaken_ost,
            basel_sbb,
            chur,
            thusis,
            samedan,
            st_moritz,
            bern_duebystrasse,
            koeniz_zentrum,
            bern_bahnhof,
            ostermundigen_bahnhof,
            samedan_bahnhof,
            samedan_spital,
        ]
    }

    footpaths_per_from_stop_to_stop_id = {(s.id, s.id):
                                          Footpath(s.id, s.id, 2 * 60)
                                          for s in stops_per_id.values()}
    footpaths_per_from_stop_to_stop_id[(zuerich_hb.id,
                                        zuerich_hb.id)] = Footpath(
                                            zuerich_hb.id, zuerich_hb.id,
                                            7 * 60)
    footpaths_per_from_stop_to_stop_id[(bern.id, bern.id)] = Footpath(
        bern.id, bern.id, 5 * 60)
    footpaths_per_from_stop_to_stop_id[(bern_bahnhof.id, bern.id)] = Footpath(
        bern_bahnhof.id, bern.id, 5 * 60)
    footpaths_per_from_stop_to_stop_id[(bern.id, bern_bahnhof.id)] = Footpath(
        bern.id, bern_bahnhof.id, 5 * 60)
    footpaths_per_from_stop_to_stop_id[(chur.id, chur.id)] = Footpath(
        chur.id, chur.id, 4 * 60)
    footpaths_per_from_stop_to_stop_id[(samedan.id,
                                        samedan_bahnhof.id)] = Footpath(
                                            samedan.id, samedan_bahnhof.id,
                                            3 * 60)
    footpaths_per_from_stop_to_stop_id[(samedan_bahnhof.id,
                                        samedan.id)] = Footpath(
                                            samedan_bahnhof.id, samedan.id,
                                            3 * 60)

    trips = []

    trips += get_forth_and_back_trips(
        [fribourg, bern, zuerich_hb, winterthur, st_gallen],
        [22 * 60, 56 * 60, 26 * 60, 35 * 60], [6 * 60, 9 * 60, 3 * 60],
        hhmmss_to_sec("05:34:00"), 32, 30 * 60)

    trips += get_forth_and_back_trips([interlaken_ost, bern, basel_sbb],
                                      [52 * 60, 55 * 60], [12 * 60],
                                      hhmmss_to_sec("05:00:00"), 16, 60 * 60)

    trips += get_forth_and_back_trips([basel_sbb, zuerich_hb, chur],
                                      [53 * 60, 75 * 60], [11 * 60],
                                      hhmmss_to_sec("05:33:00"), 16, 60 * 60)

    trips += get_forth_and_back_trips([chur, thusis, samedan, st_moritz],
                                      [30 * 60, 75 * 60, 12 * 60],
                                      [2 * 60, 6 * 60],
                                      hhmmss_to_sec("05:58:00"), 16, 60 * 60)

    trips += get_forth_and_back_trips([
        koeniz_zentrum, bern_duebystrasse, bern_bahnhof, ostermundigen_bahnhof
    ], [6 * 60, 7 * 60, 15 * 60], [0, 0], hhmmss_to_sec("05:00:00"), 10 * 16,
                                      6 * 60)

    trips += get_forth_and_back_trips([samedan_bahnhof, samedan_spital],
                                      [7 * 60], [], hhmmss_to_sec("15:00:00"),
                                      1, 24 * 60 * 60)
    return ConnectionScanData(stops_per_id, footpaths_per_from_stop_to_stop_id,
                              {t.id: t
                               for t in trips})
def test_connectionscan_data_constructor_stop_ids_in_trips_not_consistent_with_stops():
    with pytest.raises(ValueError):
        ConnectionScanData({"s1": Stop("s1", "", "", 0.0, 0.0)}, {},
                           {"t": Trip("t", [Connection("t", "s1", "s2", 30, 40)])})
    log_end(additional_message="test failed successful")
def test_connectionscan_data_constructor_trip_id_not_consistent():
    with pytest.raises(ValueError):
        ConnectionScanData({}, {}, {"t1": Trip("t", [])})
    log_end(additional_message="test failed successful")
def test_connectionscan_data_constructor_stops_in_footpath_and_stops_not_consistent():
    with pytest.raises(ValueError):
        ConnectionScanData({"s1": Stop("s1", "", "", 0.0, 0.0)}, {("s1", "s2"): Footpath("s1", "s2", 60)}, {})
    log_end(additional_message="test failed successful")
def parse_gtfs(
        path_to_gtfs_zip,
        desired_date,
        add_beeline_footpaths=True,
        beeline_distance=100.0,
        walking_speed=2.0 / 3.6,
        make_footpaths_transitive=False
):
    """Parses a gtfs-file and returns the corresponding timetable data of a specific date.

    In many GTFS files the information about the footpaths/transfers is not complete.
    In these cases it is recommended to define appropriate footpaths within a beeline distance.

    Args:
        path_to_gtfs_zip (str): path to the gtfs-file (weblink or path to a zip-file).
        desired_date (date): date on which the timetable data is read.
        add_beeline_footpaths (obj:`bool`, optional): specifies whether footpaths should be created
        depending on the beeline (air distance) and independent of the transfers.txt gtfs-file or not.
        beeline_distance (obj:`float`, optional): radius in meter of the perimeter (circle) to create
        the beeline footpaths (only relevant if add_beeline_footpaths is True).
        walking_speed (obj:`float`, optional): walking speed in meters per second for calculating the walking time
        of the created beeline footpaths (only relevant if add_beeline_footpaths is True).
        make_footpaths_transitive (obj:`bool`, optional): True if the footpaths are to be made transitive, else False.
        Making footpaths transitive can lead to long running times and implausible results.

    Returns:
        ConnectionScanData: timetable data of the specific date.
    """
    log_start("parsing gtfs-file for desired date {} ({})".format(desired_date, path_to_gtfs_zip), log)
    stops_per_id = {}
    footpaths_per_from_to_stop_id = {}
    trips_per_id = {}

    with ZipFile(path_to_gtfs_zip, "r") as zip_file:
        log_start("parsing stops.txt", log)
        with zip_file.open("stops.txt", "r") as gtfs_file:  # required
            reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING))
            header = next(reader)
            id_index = header.index("stop_id")  # required
            code_index = get_index_with_default(header, "stop_code")  # optional
            name_index = get_index_with_default(header, "stop_name")  # conditionally required
            lat_index = get_index_with_default(header, "stop_lat")  # conditionally required
            lon_index = get_index_with_default(header, "stop_lon")  # conditionally required
            location_type_index = get_index_with_default(header, "location_type")
            parent_station_index = get_index_with_default(header, "parent_station")
            for row in reader:
                stop_id = row[id_index]
                is_station = row[location_type_index] == "1" if location_type_index else False
                parent_station_id = ((row[parent_station_index] if row[parent_station_index] != "" else None)
                                     if parent_station_index else None)
                stops_per_id[stop_id] = Stop(
                    stop_id,
                    row[code_index] if code_index else "",
                    row[name_index] if name_index else "",
                    float(row[lon_index]) if lon_index else 0.0,
                    float(row[lat_index]) if lat_index else 0.0,
                    is_station=is_station,
                    parent_station_id=parent_station_id
                )
        log_end(additional_message="# stops: {}".format(len(stops_per_id)))

        log_start("parsing transfers.txt", log)
        if "transfers.txt" in zip_file.namelist():
            with zip_file.open("transfers.txt", "r") as gtfs_file:  # optional
                reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING))
                header = next(reader)
                from_stop_id_index = header.index("from_stop_id")  # required
                to_stop_id_index = header.index("to_stop_id")  # required
                transfer_type_index = header.index("transfer_type")  # required
                min_transfer_time_index = get_index_with_default(header, "min_transfer_time")  # optional
                if min_transfer_time_index:
                    nb_footpaths_not_added = 0
                    for row in reader:
                        if row[transfer_type_index] == "2":
                            from_stop_id = row[from_stop_id_index]
                            to_stop_id = row[to_stop_id_index]
                            if from_stop_id in stops_per_id and to_stop_id in stops_per_id:
                                footpaths_per_from_to_stop_id[(from_stop_id, to_stop_id)] = Footpath(
                                    from_stop_id,
                                    to_stop_id,
                                    int(row[min_transfer_time_index])
                                )
                            else:
                                nb_footpaths_not_added += 1
                                log.debug(("footpath from {} to {} cannot be defined since not both stops are defined "
                                           "in stops.txt").format(from_stop_id, to_stop_id))
                    if nb_footpaths_not_added > 0:
                        log.info(("{} rows from transfers.txt were not added to footpaths since either the "
                                  "from_stop_id or to_stop_id is not defined in stops.txt.").format(
                            nb_footpaths_not_added))
                else:
                    raise ValueError(("min_transfer_time column in gtfs transfers.txt file is not defined, "
                                      "cannot calculate footpaths."))
        log_end(additional_message="# footpaths from transfers.txt: {}".format(len(footpaths_per_from_to_stop_id)))
        log_start("adding footpaths to parent station", log)
        nb_parent_footpaths = 0
        for a_stop in stops_per_id.values():
            if a_stop.parent_station_id is not None:
                key = (a_stop.id, a_stop.parent_station_id)
                if key not in footpaths_per_from_to_stop_id:
                    footpaths_per_from_to_stop_id[key] = Footpath(key[0], key[1], 0)
                    nb_parent_footpaths += 1
                if (key[1], key[0]) not in footpaths_per_from_to_stop_id:
                    footpaths_per_from_to_stop_id[(key[1], key[0])] = Footpath(key[1], key[0], 0)
                    nb_parent_footpaths += 1
        log_end(additional_message="# footpath from/to parent_station added: {}. # footpaths total: {}".format(
            nb_parent_footpaths, len(footpaths_per_from_to_stop_id)))
        log_start("adding footpaths within stops (if not defined)", log)
        nb_loops = 0
        for stop_id in stops_per_id.keys():
            from_to_stop_id = (stop_id, stop_id)
            if from_to_stop_id not in footpaths_per_from_to_stop_id:
                footpaths_per_from_to_stop_id[from_to_stop_id] = Footpath(stop_id, stop_id, 0)  # best guess!!
                nb_loops += 1
        log_end(additional_message="# footpath loops added: {}, # footpaths total: {}".format(nb_loops, len(
            footpaths_per_from_to_stop_id)))

        if add_beeline_footpaths:
            create_beeline_footpaths(stops_per_id, footpaths_per_from_to_stop_id, beeline_distance, walking_speed)
        else:
            log.info("adding beeline footpaths is deactivated")

        if make_footpaths_transitive:
            make_transitive(footpaths_per_from_to_stop_id)
        else:
            log.info("making footpaths transitive is deactivated")

        log_start("parsing calendar.txt and calendar_dates.txt", log)
        service_available_at_date_per_service_id = get_service_available_at_date_per_service_id(zip_file, desired_date)
        log_end()

        log_start("parsing trips.txt", log)
        trip_available_at_date_per_trip_id, route_id_per_trip_id = \
            get_trip_available_at_date_per_trip_id(zip_file, service_available_at_date_per_service_id)
        if len(trip_available_at_date_per_trip_id):
            msg = "# trips available at {}: {}".format(desired_date, len(trip_available_at_date_per_trip_id))
        else:
            msg = "no trips available at {}. assure that the date is within the timetable period.".format(desired_date)
        log_end(additional_message=msg)

        log_start("parsing routes.txt and assigning route_type to trip_id", log)
        with zip_file.open("routes.txt", "r") as gtfs_file:  # required
            reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING))
            header = next(reader)
            route_id_index = header.index("route_id")  # required
            route_type_index = header.index("route_type")  # required
            route_type_per_route_id = {}
            for row in reader:
                route_type_per_route_id[row[route_id_index]] = int(row[route_type_index])
        route_type_per_trip_id = {trip_id: route_type_per_route_id[route_id_per_trip_id[trip_id]]
                                  for trip_id in route_id_per_trip_id}
        log_end()

        log_start("parsing stop_times.txt", log)
        with zip_file.open("stop_times.txt", "r") as gtfs_file:  # required
            reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING))
            header = next(reader)
            trip_id_index = header.index("trip_id")  # required
            stop_id_index = header.index("stop_id")  # required
            arrival_time_index = get_index_with_default(header, "arrival_time")  # conditionally required
            departure_time_index = get_index_with_default(header, "departure_time")  # conditionally required

            def process_rows_of_trip(rows):
                if rows:
                    trip_id = rows[0][trip_id_index]
                    if trip_available_at_date_per_trip_id[trip_id]:
                        connections = []
                        for i in range(len(rows) - 1):
                            from_row = rows[i]
                            to_row = rows[i + 1]
                            con_dep = from_row[departure_time_index] if departure_time_index else None
                            con_arr = to_row[arrival_time_index] if arrival_time_index else None
                            if con_dep and con_arr:
                                connections += [Connection(
                                    trip_id,
                                    from_row[stop_id_index],
                                    to_row[stop_id_index],
                                    hhmmss_to_sec(con_dep),
                                    hhmmss_to_sec(con_arr))]
                            else:
                                return  # we do not want trips with missing times

                        try:
                            trip_type = TripType(route_type_per_trip_id[trip_id])
                        except ValueError:
                            trip_type = TripType.UNKNOWN
                        trips_per_id[trip_id] = Trip(trip_id, connections, trip_type)

            last_trip_id = None
            row_list = []
            for row in reader:
                act_trip_id = row[trip_id_index]
                if last_trip_id == act_trip_id:
                    row_list += [row]
                else:
                    process_rows_of_trip(row_list)
                    last_trip_id = act_trip_id
                    row_list = [row]
            process_rows_of_trip(row_list)
        log_end(additional_message="# trips: {}".format(len(trips_per_id)))

    cs_data = ConnectionScanData(stops_per_id, footpaths_per_from_to_stop_id, trips_per_id)
    log_end(additional_message="{}".format(cs_data))
    return cs_data