コード例 #1
0
def make_transitive(footpaths_per_from_to_stop_id):
    """Iteratively adds new footpaths or modifies the walking times until the footpaths are transitive.

    Args:
        footpaths_per_from_to_stop_id (dict): footpath per (from_stop_id, to_stop_id)-tuple.
    """
    log_start("making footpaths transitive", log)
    n = 0
    nb_footpaths_added = 0
    nb_footpaths_changed_time = 0
    while True:
        new_footpaths, footpaths_with_time_change = check_for_transitivity(
            footpaths_per_from_to_stop_id)
        if len(new_footpaths) > 0 or len(footpaths_with_time_change) > 0:
            for new_footpath in new_footpaths:
                footpaths_per_from_to_stop_id[
                    new_footpath.from_stop_id,
                    new_footpath.to_stop_id] = new_footpath
            for mod_footpath in footpaths_with_time_change:
                footpaths_per_from_to_stop_id[
                    mod_footpath.from_stop_id,
                    mod_footpath.to_stop_id] = mod_footpath
            str_msg = "iteration {}: # footpaths added: {}, # footpaths with changed walking time: {}"
            log.info(
                str_msg.format(n, len(new_footpaths),
                               len(footpaths_with_time_change)))
            n += 1
            nb_footpaths_added += len(new_footpaths)
            nb_footpaths_changed_time += len(footpaths_with_time_change)
        else:
            break
    str_msg = "# iterations: {}, # footpaths added: {}, # footpaths with changed time: {}, # footpaths total: {}"
    log_end(additional_message=str_msg.format(
        n, nb_footpaths_added, nb_footpaths_changed_time,
        len(footpaths_per_from_to_stop_id)))
コード例 #2
0
 def __init__(self, connection_scan_data):
     log_start("creating ConnectionScanData", log)
     # static per ConnectionScanCore
     self.MAX_ARR_TIME_VALUE = 2 * 24 * 60 * 60  # we assume that arrival times are always within two days
     self.connection_scan_data = connection_scan_data
     self.outgoing_footpaths_per_stop_id = defaultdict(list)
     for footpath in self.connection_scan_data.footpaths_per_from_to_stop_id.values(
     ):
         self.outgoing_footpaths_per_stop_id[footpath.from_stop_id] += [
             footpath
         ]
     log_end()
コード例 #3
0
def check_for_transitivity(footpaths_per_from_to_stop_id):
    """Checks the footpaths for transitivity
    and returns missing footpaths and modified footpaths violating the triangle inequality.

    More precisely:
    - if there are three stops s_1, s_2 and s_3 with a footpath from s_1 to s_2 and a footpath from s_2 to s_3,
    but no footpath from s_1 to s_3, a new footpath from s_1 to s_3 is created (with walking time equal to the sum
    of the walking time of the two existing footpaths.
    This new footpath is added to the list returned in the first entry of the returned tuple.
    - if there are three stops s_1, s_2 and s_3 with a footpath from s_1 to s_2, s_2 to s_3 and s_1 to s_3, but the
    sum of the walking times of s_1 to s_2 and s_2 to s_3 is smaller than the walking time from s_1 to s_3,
    a new footpath from s_1 to s_3 is created with walking time equal to the sum of the two other walking times
    (only if this sum is positive).
    This new footpath is added to the list returned in the second entry of the returned tuple.

    Args:
        footpaths_per_from_to_stop_id (dict): footpath per (from_stop_id, to_stop_id)-tuple.

    Returns:
        tuple: new and modified footpaths (see above for the details).
    """
    log_start("checking footpaths for transitivity", log)
    footpaths_per_from_stop_id = {}
    for (from_stop_id, _), footpath in footpaths_per_from_to_stop_id.items():
        footpaths_per_from_stop_id[
            from_stop_id] = footpaths_per_from_stop_id.get(from_stop_id,
                                                           []) + [footpath]
    new_footpaths = []
    footpaths_with_time_change = []
    footpaths_per_from_stop_id = dict(footpaths_per_from_stop_id)
    for from_stop_id, outgoing_footpaths in footpaths_per_from_stop_id.items():
        for first_footpath in outgoing_footpaths:
            for second_footpath in footpaths_per_from_stop_id.get(
                    first_footpath.to_stop_id, []):
                second_to_stop_id = second_footpath.to_stop_id
                new_wt = first_footpath.walking_time + second_footpath.walking_time
                if (from_stop_id, second_to_stop_id
                    ) not in footpaths_per_from_to_stop_id:
                    new_footpaths += [
                        Footpath(from_stop_id, second_to_stop_id, new_wt)
                    ]
                elif 0 < new_wt < footpaths_per_from_to_stop_id[(
                        from_stop_id, second_to_stop_id)].walking_time:
                    footpaths_with_time_change += [
                        Footpath(from_stop_id, second_to_stop_id, new_wt)
                    ]
    log_end()
    return new_footpaths, footpaths_with_time_change
コード例 #4
0
    def route_earliest_arrival_with_reconstruction(self, from_stop_id,
                                                   to_stop_id,
                                                   desired_dep_time):
        """Executes the unoptimized earliest arrival with reconstruction version
        (figure 6 of https://arxiv.org/pdf/1703.05997.pdf) of the
        connection scan algorithm from the source to the target stop respecting the desired departure time.

        Note:
            - In order to correctly model the footpaths at the start and end of the journey,
            the algorithm from the pseudo code is slightly modified.
            - the data structures are not optimized for performance.

        Args:
            from_stop_id (str): id of the source stop.
            to_stop_id (str): id of the target stop.
            desired_dep_time (int): desired departure time in seconds after midnight.

        Returns:
            Journey: a Journey with earliest possible arrival time from the source to the target stop.
        """
        log_start(
            "unoptimized earliest arrival routing with journey reconstruction from {} to {} at {}"
            .format(self.connection_scan_data.stops_per_id[from_stop_id].name,
                    self.connection_scan_data.stops_per_id[to_stop_id].name,
                    seconds_to_hhmmss(desired_dep_time)), log)

        # TODO implement task 2 here
        # Some hints:
        # - Implement task 1 first.
        # - In order to correctly model the footpaths at the start and end of the journey,
        #   the algorithm from the pseudo code must be slightly modified.
        # - You could use the following dynamic data structures:
        #    - a dict for the earliest arrival including transfer/walking times per stop.
        #    - a int for the earliest arrival time at the target stop.
        #    - a dict for the in/boarding connection per trip.
        #    - a JourneyLeg for the last journey leg at the target stop.
        # - Construct the resulting Journey with the reconstruction logic from the pseudo code.

        res = Journey()
        log_end(additional_message="# journey legs: {}".format(
            0 if res is None else res.get_nb_journey_legs()))
        return res
コード例 #5
0
    def route_optimized_earliest_arrival_with_reconstruction(
            self, from_stop_id, to_stop_id, desired_dep_time):
        """Executes the optimized earliest arrival with reconstruction version
        (figure 4 and 6 of https://arxiv.org/pdf/1703.05997.pdf) of the
        connection scan algorithm from the source to the target stop respecting the desired departure time.

        Note:
            - In order to correctly model the footpaths at the start and end of the journey,
            the algorithm from the pseudo code is slightly modified.
            - the data structures are not optimized for performance.

        Args:
            from_stop_id (str): id of the source stop.
            to_stop_id (str): id of the target stop.
            desired_dep_time (int): desired departure time in seconds after midnight.

        Returns:
            Journey: a Journey with earliest possible arrival time from the source to the target stop.
        """
        log_start(
            "optimized earliest arrival routing with journey reconstruction from {} to {} at {}"
            .format(self.connection_scan_data.stops_per_id[from_stop_id].name,
                    self.connection_scan_data.stops_per_id[to_stop_id].name,
                    seconds_to_hhmmss(desired_dep_time)), log)

        # TODO implement task 3 here
        # Some hints:
        # - Implement task 2 first.
        # - In order to correctly model the footpaths at the start and end of the journey,
        #   the algorithm from the pseudo code must be slightly modified.
        # - You could use the same dynamic data structures and reconstruction logic as in task 2.
        # - Implement the three optimization criterion's described in the paper (on page 8):
        #   stopping criterion, starting criterion and limited walking.
        #   For the starting criterion you can use the function binary_search from the funs module.

        res = Journey()
        log_end(additional_message="# journey legs: {}".format(
            0 if res is None else res.get_nb_journey_legs()))
        return res
コード例 #6
0
    def route_earliest_arrival(self, from_stop_id, to_stop_id,
                               desired_dep_time):
        """Executes the unoptimized earliest arrival version (figure 3 of https://arxiv.org/pdf/1703.05997.pdf) of the
        connection scan algorithm from the source to the target stop respecting the desired departure time.

        Note:
            - In order to correctly model the footpaths at the start and end of the journey,
            the algorithm from the pseudo code is slightly modified.
            - the data structures are not optimized for performance.

        Args:
            from_stop_id (str): id of the source stop.
            to_stop_id (str): id of the target stop.
            desired_dep_time (int): desired departure time in seconds after midnight.

        Returns:
            int: earliest possible arrival time at the target stop.
        """

        log_start(
            "unoptimized earliest arrival routing from {} to {} at {}".format(
                self.connection_scan_data.stops_per_id[from_stop_id].name,
                self.connection_scan_data.stops_per_id[to_stop_id].name,
                seconds_to_hhmmss(desired_dep_time)), log)

        # TODO implement task 1 here
        # Some hints:
        # - First get familiar with the data structures from the classes module and the ConnectionScanData class
        # - In order to correctly model the footpaths at the start and end of the journey,
        #   the algorithm from the pseudo code must be slightly modified.
        # - You could use the following dynamic data structures:
        #    - a dict for the earliest arrival including transfer/walking times per stop.
        #    - a int for the earliest arrival time at the target stop.
        #    - a dict to mark if a trip is set or not.

        res = -1
        log_end(additional_message="earliest arrival time: {}".format(
            seconds_to_hhmmss(res) if res else res))
        return res
コード例 #7
0
def create_beeline_footpaths(stops_per_id, footpaths_per_from_to_stop_id, beeline_distance, walking_speed):
    """Creates for every stop new footpaths to the other stops within the beeline distance
    if they are not already defined.

    The new footpaths are added to footpaths_per_from_to_stop_id.
    The walking time of the new footpath is calculated from the beeline distance and the specific walking speed.

    Args:
        stops_per_id (dict): stops per stop id.
        footpaths_per_from_to_stop_id (): footpaths per (from_stop_id, to_stop_id) tuple.
        beeline_distance (float): the beeline distance in meters.
        walking_speed (float): walking speed in meters per second.
    """
    nb_footpaths_perimeter = 0
    log_start("adding footpaths in beeline perimeter with radius {}m".format(beeline_distance), log)
    log_start("transforming coordinates", log)
    stop_list = list(stops_per_id.values())
    easting_northing_list = [(s.easting, s.northing) for s in stop_list]
    x_y_coordinates = [wgs84_to_spherical_mercator(p[0], p[1]) for p in easting_northing_list]
    log_end()
    log_start("creating quadtree for fast perimeter search", log)
    tree = spatial.KDTree(x_y_coordinates)
    log_end()
    log_start("perimeter search around every stop", log)
    for ind, a_stop in enumerate(stop_list):
        x_y_a_stop = x_y_coordinates[ind]
        for another_ind in tree.query_ball_point(x_y_a_stop, beeline_distance):
            x_y_another_stop = x_y_coordinates[another_ind]
            d = distance(x_y_a_stop, x_y_another_stop)
            # distance in meters.
            # note that distances can be very inaccurate (up to factor 2 on a lat of 60°), but this should be ok here.
            walking_time = d / walking_speed
            another_stop = stop_list[another_ind]
            key = (a_stop.id, another_stop.id)
            if key not in footpaths_per_from_to_stop_id:
                footpaths_per_from_to_stop_id[key] = Footpath(key[0], key[1], walking_time)
                nb_footpaths_perimeter += 1
            if (key[1], key[0]) not in footpaths_per_from_to_stop_id:
                footpaths_per_from_to_stop_id[(key[1], key[0])] = Footpath(key[1], key[0], walking_time)
                nb_footpaths_perimeter += 1
    log_end()
    log_end(additional_message="# footpath within perimeter added: {}. # footpaths total: {}".format(
        nb_footpaths_perimeter,
        len(footpaths_per_from_to_stop_id)
    ))
コード例 #8
0
    def __init__(self, stops_per_id, footpaths_per_from_to_stop_id,
                 trips_per_id):
        log_start("creating ConnectionScanData", log)
        # stops
        for stop_id, stop in stops_per_id.items():
            if stop_id != stop.id:
                raise ValueError(
                    "id in dict ({}) does not equal id in Stop {}".format(
                        stop_id, stop))
        self.stops_per_id = stops_per_id
        stop_list_per_name = defaultdict(list)
        for a_stop in self.stops_per_id.values():
            stop_list_per_name[a_stop.name] += [a_stop]

        def choose_best_stop(stops_with_same_name):
            """Helper function for chosen the best fitting stop per stop name"""
            stops_with_same_name_sorted = sorted(
                stops_with_same_name,
                key=lambda s: (0 if s.is_station else 1, len(s.id)))
            return stops_with_same_name_sorted[0]

        self.stops_per_name = {
            name: choose_best_stop(stop_list)
            for (name, stop_list) in stop_list_per_name.items()
        }

        # footpaths
        for ((from_stop_id, to_stop_id),
             footpath) in footpaths_per_from_to_stop_id.items():
            if from_stop_id != footpath.from_stop_id:
                raise ValueError(
                    "from_stop_id {} in dict does not equal from_stop_id in footpath {}"
                    .format(from_stop_id, footpath))
            if to_stop_id != footpath.to_stop_id:
                raise ValueError(
                    "to_stop_id {} in dict does not equal to_stop_id in footpath {}"
                    .format(to_stop_id, footpath))

        stop_ids_in_footpaths = {
            s[0]
            for s in footpaths_per_from_to_stop_id.keys()
        }.union({s[1]
                 for s in footpaths_per_from_to_stop_id.keys()})
        stop_ids_in_footpaths_not_in_stops = stop_ids_in_footpaths.difference(
            set(stops_per_id.keys()))
        if len(stop_ids_in_footpaths_not_in_stops) > 0:
            raise ValueError((
                "there are stop_ids in footpaths_per_from_to_stop_id which do not occur as stop_id in "
                "stops_per_id: {}").format(stop_ids_in_footpaths_not_in_stops))

        self.footpaths_per_from_to_stop_id = footpaths_per_from_to_stop_id

        new_footpaths, footpaths_with_time_change = check_for_transitivity(
            self.footpaths_per_from_to_stop_id)
        if len(new_footpaths) > 0 or len(footpaths_with_time_change) > 0:
            msg_str = "footpaths are not transitive: there are {} missing footpaths and {} footpaths" \
                      " violating the triangle inequality"
            log.warning(
                msg_str.format(len(new_footpaths),
                               len(footpaths_with_time_change)))

        # trips
        for trip_id, trip in trips_per_id.items():
            if trip_id != trip.id:
                raise ValueError(
                    "id in dict ({}) does not equal id in Trip {}".format(
                        trip_id, trip))

        stop_ids_in_trips = {
            s
            for t in trips_per_id.values()
            for s in t.get_set_of_all_stop_ids()
        }
        stop_ids_in_trips_not_in_stops = stop_ids_in_trips.difference(
            set(stops_per_id.keys()))
        if len(stop_ids_in_trips_not_in_stops) > 0:
            raise ValueError(
                "there are stop_ids in trips_per_id which do not occur as stop_id in stops_per_id: {}"
                .format(stop_ids_in_trips_not_in_stops))
        self.trips_per_id = trips_per_id

        cons_in_trips = [t.connections for t in trips_per_id.values()]
        self.sorted_connections = sorted(
            [c for cons in cons_in_trips for c in cons],
            key=lambda c: (c.dep_time, c.arr_time))
        log_end()
コード例 #9
0
def test_connectionscan_data_constructor_stop_ids_in_trips_not_consistent_with_stops():
    with pytest.raises(ValueError):
        ConnectionScanData({"s1": Stop("s1", "", "", 0.0, 0.0)}, {},
                           {"t": Trip("t", [Connection("t", "s1", "s2", 30, 40)])})
    log_end(additional_message="test failed successful")
コード例 #10
0
def test_connectionscan_data_constructor_trip_id_not_consistent():
    with pytest.raises(ValueError):
        ConnectionScanData({}, {}, {"t1": Trip("t", [])})
    log_end(additional_message="test failed successful")
コード例 #11
0
def test_connectionscan_data_constructor_stops_in_footpath_and_stops_not_consistent():
    with pytest.raises(ValueError):
        ConnectionScanData({"s1": Stop("s1", "", "", 0.0, 0.0)}, {("s1", "s2"): Footpath("s1", "s2", 60)}, {})
    log_end(additional_message="test failed successful")
コード例 #12
0
def parse_gtfs(
        path_to_gtfs_zip,
        desired_date,
        add_beeline_footpaths=True,
        beeline_distance=100.0,
        walking_speed=2.0 / 3.6,
        make_footpaths_transitive=False
):
    """Parses a gtfs-file and returns the corresponding timetable data of a specific date.

    In many GTFS files the information about the footpaths/transfers is not complete.
    In these cases it is recommended to define appropriate footpaths within a beeline distance.

    Args:
        path_to_gtfs_zip (str): path to the gtfs-file (weblink or path to a zip-file).
        desired_date (date): date on which the timetable data is read.
        add_beeline_footpaths (obj:`bool`, optional): specifies whether footpaths should be created
        depending on the beeline (air distance) and independent of the transfers.txt gtfs-file or not.
        beeline_distance (obj:`float`, optional): radius in meter of the perimeter (circle) to create
        the beeline footpaths (only relevant if add_beeline_footpaths is True).
        walking_speed (obj:`float`, optional): walking speed in meters per second for calculating the walking time
        of the created beeline footpaths (only relevant if add_beeline_footpaths is True).
        make_footpaths_transitive (obj:`bool`, optional): True if the footpaths are to be made transitive, else False.
        Making footpaths transitive can lead to long running times and implausible results.

    Returns:
        ConnectionScanData: timetable data of the specific date.
    """
    log_start("parsing gtfs-file for desired date {} ({})".format(desired_date, path_to_gtfs_zip), log)
    stops_per_id = {}
    footpaths_per_from_to_stop_id = {}
    trips_per_id = {}

    with ZipFile(path_to_gtfs_zip, "r") as zip_file:
        log_start("parsing stops.txt", log)
        with zip_file.open("stops.txt", "r") as gtfs_file:  # required
            reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING))
            header = next(reader)
            id_index = header.index("stop_id")  # required
            code_index = get_index_with_default(header, "stop_code")  # optional
            name_index = get_index_with_default(header, "stop_name")  # conditionally required
            lat_index = get_index_with_default(header, "stop_lat")  # conditionally required
            lon_index = get_index_with_default(header, "stop_lon")  # conditionally required
            location_type_index = get_index_with_default(header, "location_type")
            parent_station_index = get_index_with_default(header, "parent_station")
            for row in reader:
                stop_id = row[id_index]
                is_station = row[location_type_index] == "1" if location_type_index else False
                parent_station_id = ((row[parent_station_index] if row[parent_station_index] != "" else None)
                                     if parent_station_index else None)
                stops_per_id[stop_id] = Stop(
                    stop_id,
                    row[code_index] if code_index else "",
                    row[name_index] if name_index else "",
                    float(row[lon_index]) if lon_index else 0.0,
                    float(row[lat_index]) if lat_index else 0.0,
                    is_station=is_station,
                    parent_station_id=parent_station_id
                )
        log_end(additional_message="# stops: {}".format(len(stops_per_id)))

        log_start("parsing transfers.txt", log)
        if "transfers.txt" in zip_file.namelist():
            with zip_file.open("transfers.txt", "r") as gtfs_file:  # optional
                reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING))
                header = next(reader)
                from_stop_id_index = header.index("from_stop_id")  # required
                to_stop_id_index = header.index("to_stop_id")  # required
                transfer_type_index = header.index("transfer_type")  # required
                min_transfer_time_index = get_index_with_default(header, "min_transfer_time")  # optional
                if min_transfer_time_index:
                    nb_footpaths_not_added = 0
                    for row in reader:
                        if row[transfer_type_index] == "2":
                            from_stop_id = row[from_stop_id_index]
                            to_stop_id = row[to_stop_id_index]
                            if from_stop_id in stops_per_id and to_stop_id in stops_per_id:
                                footpaths_per_from_to_stop_id[(from_stop_id, to_stop_id)] = Footpath(
                                    from_stop_id,
                                    to_stop_id,
                                    int(row[min_transfer_time_index])
                                )
                            else:
                                nb_footpaths_not_added += 1
                                log.debug(("footpath from {} to {} cannot be defined since not both stops are defined "
                                           "in stops.txt").format(from_stop_id, to_stop_id))
                    if nb_footpaths_not_added > 0:
                        log.info(("{} rows from transfers.txt were not added to footpaths since either the "
                                  "from_stop_id or to_stop_id is not defined in stops.txt.").format(
                            nb_footpaths_not_added))
                else:
                    raise ValueError(("min_transfer_time column in gtfs transfers.txt file is not defined, "
                                      "cannot calculate footpaths."))
        log_end(additional_message="# footpaths from transfers.txt: {}".format(len(footpaths_per_from_to_stop_id)))
        log_start("adding footpaths to parent station", log)
        nb_parent_footpaths = 0
        for a_stop in stops_per_id.values():
            if a_stop.parent_station_id is not None:
                key = (a_stop.id, a_stop.parent_station_id)
                if key not in footpaths_per_from_to_stop_id:
                    footpaths_per_from_to_stop_id[key] = Footpath(key[0], key[1], 0)
                    nb_parent_footpaths += 1
                if (key[1], key[0]) not in footpaths_per_from_to_stop_id:
                    footpaths_per_from_to_stop_id[(key[1], key[0])] = Footpath(key[1], key[0], 0)
                    nb_parent_footpaths += 1
        log_end(additional_message="# footpath from/to parent_station added: {}. # footpaths total: {}".format(
            nb_parent_footpaths, len(footpaths_per_from_to_stop_id)))
        log_start("adding footpaths within stops (if not defined)", log)
        nb_loops = 0
        for stop_id in stops_per_id.keys():
            from_to_stop_id = (stop_id, stop_id)
            if from_to_stop_id not in footpaths_per_from_to_stop_id:
                footpaths_per_from_to_stop_id[from_to_stop_id] = Footpath(stop_id, stop_id, 0)  # best guess!!
                nb_loops += 1
        log_end(additional_message="# footpath loops added: {}, # footpaths total: {}".format(nb_loops, len(
            footpaths_per_from_to_stop_id)))

        if add_beeline_footpaths:
            create_beeline_footpaths(stops_per_id, footpaths_per_from_to_stop_id, beeline_distance, walking_speed)
        else:
            log.info("adding beeline footpaths is deactivated")

        if make_footpaths_transitive:
            make_transitive(footpaths_per_from_to_stop_id)
        else:
            log.info("making footpaths transitive is deactivated")

        log_start("parsing calendar.txt and calendar_dates.txt", log)
        service_available_at_date_per_service_id = get_service_available_at_date_per_service_id(zip_file, desired_date)
        log_end()

        log_start("parsing trips.txt", log)
        trip_available_at_date_per_trip_id, route_id_per_trip_id = \
            get_trip_available_at_date_per_trip_id(zip_file, service_available_at_date_per_service_id)
        if len(trip_available_at_date_per_trip_id):
            msg = "# trips available at {}: {}".format(desired_date, len(trip_available_at_date_per_trip_id))
        else:
            msg = "no trips available at {}. assure that the date is within the timetable period.".format(desired_date)
        log_end(additional_message=msg)

        log_start("parsing routes.txt and assigning route_type to trip_id", log)
        with zip_file.open("routes.txt", "r") as gtfs_file:  # required
            reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING))
            header = next(reader)
            route_id_index = header.index("route_id")  # required
            route_type_index = header.index("route_type")  # required
            route_type_per_route_id = {}
            for row in reader:
                route_type_per_route_id[row[route_id_index]] = int(row[route_type_index])
        route_type_per_trip_id = {trip_id: route_type_per_route_id[route_id_per_trip_id[trip_id]]
                                  for trip_id in route_id_per_trip_id}
        log_end()

        log_start("parsing stop_times.txt", log)
        with zip_file.open("stop_times.txt", "r") as gtfs_file:  # required
            reader = csv.reader(TextIOWrapper(gtfs_file, ENCODING))
            header = next(reader)
            trip_id_index = header.index("trip_id")  # required
            stop_id_index = header.index("stop_id")  # required
            arrival_time_index = get_index_with_default(header, "arrival_time")  # conditionally required
            departure_time_index = get_index_with_default(header, "departure_time")  # conditionally required

            def process_rows_of_trip(rows):
                if rows:
                    trip_id = rows[0][trip_id_index]
                    if trip_available_at_date_per_trip_id[trip_id]:
                        connections = []
                        for i in range(len(rows) - 1):
                            from_row = rows[i]
                            to_row = rows[i + 1]
                            con_dep = from_row[departure_time_index] if departure_time_index else None
                            con_arr = to_row[arrival_time_index] if arrival_time_index else None
                            if con_dep and con_arr:
                                connections += [Connection(
                                    trip_id,
                                    from_row[stop_id_index],
                                    to_row[stop_id_index],
                                    hhmmss_to_sec(con_dep),
                                    hhmmss_to_sec(con_arr))]
                            else:
                                return  # we do not want trips with missing times

                        try:
                            trip_type = TripType(route_type_per_trip_id[trip_id])
                        except ValueError:
                            trip_type = TripType.UNKNOWN
                        trips_per_id[trip_id] = Trip(trip_id, connections, trip_type)

            last_trip_id = None
            row_list = []
            for row in reader:
                act_trip_id = row[trip_id_index]
                if last_trip_id == act_trip_id:
                    row_list += [row]
                else:
                    process_rows_of_trip(row_list)
                    last_trip_id = act_trip_id
                    row_list = [row]
            process_rows_of_trip(row_list)
        log_end(additional_message="# trips: {}".format(len(trips_per_id)))

    cs_data = ConnectionScanData(stops_per_id, footpaths_per_from_to_stop_id, trips_per_id)
    log_end(additional_message="{}".format(cs_data))
    return cs_data