Beispiel #1
0
class TimetableValidator(object):
    def __init__(self, gtfs, buffer_params=None):
        """
        Parameters
        ----------
        gtfs: GTFS, or path to a GTFS object
            A GTFS object
        """
        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs
        self.buffer_params = buffer_params
        self.warnings_container = WarningsContainer()

    def validate_and_get_warnings(self):
        """
        Validates/checks a given GTFS feed with respect to a number of different issues.

        The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS

        Returns
        -------
        warnings: WarningsContainer
        """
        self.warnings_container.clear()
        self._validate_stops_with_same_stop_time()
        self._validate_speeds_and_trip_times()
        self._validate_stop_spacings()
        self._validate_stop_sequence()
        self._validate_misplaced_stops()
        return self.warnings_container

    def _validate_misplaced_stops(self):
        if self.buffer_params:
            p = self.buffer_params
            center_lat = p['lat']
            center_lon = p['lon']
            buffer_distance = p[
                'buffer_distance'] * 1000 * 1.002  # some error margin for rounding
            for stop_row in self.gtfs.stops().itertuples():
                if buffer_distance < wgs84_distance(
                        center_lat, center_lon, stop_row.lat, stop_row.lon):
                    self.warnings_container.add_warning(
                        WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)
                    print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)

    def _validate_stops_with_same_stop_time(self):
        n_stops_with_same_time = 5
        # this query returns the trips where there are N or more stops with the same stop time
        rows = self.gtfs.get_cursor().execute(
            'SELECT '
            'trip_I, '
            'arr_time, '
            'N '
            'FROM '
            '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 '
            'WHERE N >= ?', (n_stops_with_same_time, ))
        for row in rows:
            self.warnings_container.add_warning(
                WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME, row)

    def _validate_stop_spacings(self):
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)
        # this query calculates distance and travel time between consecutive stops
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'q1.stop_I as stop_1, '
            'q2.stop_I as stop_2, '
            'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, '
            'q2.arr_time_ds - q1.arr_time_ds as traveltime '
            'FROM '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            'trips, '
            'routes '
            'WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq '
            'AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I ').fetchall()
        for row in rows:
            if row[4] > MAX_ALLOWED_DISTANCE_BETWEEN_CONSECUTIVE_STOPS:
                self.warnings_container.add_warning(WARNING_LONG_STOP_SPACING,
                                                    row)
            if row[5] > MAX_TIME_BETWEEN_STOPS:
                self.warnings_container.add_warning(
                    WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS, row)

    def _validate_speeds_and_trip_times(self):
        # These are the mode - feasible speed combinations used here:
        # https://support.google.com/transitpartners/answer/1095482?hl=en
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)

        # this query returns the total distance and travel time for each trip calculated for each stop spacing separately
        rows = pandas.read_sql(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, '  # sum used for getting total
            'sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime, '  # sum used for getting total
            'count(*)'  # for getting the total number of stops
            'FROM '
            '   (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '   (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            '    trips, '
            '    routes '
            'WHERE q1.trip_I = q2.trip_I AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I GROUP BY q1.trip_I',
            self.gtfs.conn)

        for row in rows.itertuples():
            avg_velocity_km_per_h = row.total_distance / max(
                row.total_traveltime, 1) * 3.6
            if avg_velocity_km_per_h > GTFS_TYPE_TO_MAX_SPEED[row.type]:
                self.warnings_container.add_warning(
                    WARNING_TRIP_UNREALISTIC_AVERAGE_SPEED + " (route_type=" +
                    str(row.type) + ")", row)
            if row.total_traveltime > MAX_TRIP_TIME:
                self.warnings_container.add_warning(
                    WARNING_LONG_TRIP_TIME.format(MAX_TRIP_TIME=MAX_TRIP_TIME),
                    row, 1)

    def _validate_stop_sequence(self):
        # This function checks if the seq values in stop_times are increasing with departure_time,
        # and that seq always increases by one.
        rows = self.gtfs.execute_custom_query(
            'SELECT trip_I, dep_time_ds, seq '
            'FROM stop_times '
            'ORDER BY trip_I, dep_time_ds, seq').fetchall()
        old_trip_id = None
        old_seq = None
        for row in rows:
            new_trip_id = int(row[0])
            new_seq = int(row[2])
            if old_trip_id == new_trip_id:
                if old_seq + 1 != new_seq:
                    self.warnings_container.add_warning(
                        WARNING_STOP_SEQUENCE_NOT_INCREMENTAL, row)
                if old_seq >= new_seq:
                    self.warnings_container.add_warning(
                        WARNING_STOP_SEQUENCE_ORDER_ERROR, row)
            old_trip_id = row[0]
            old_seq = row[2]
Beispiel #2
0
class TimetableValidator(object):
    def __init__(self, gtfs, buffer_params=None):
        """
        Parameters
        ----------
        gtfs: GTFS, or path to a GTFS object
            A GTFS object
        """
        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs
        self.buffer_params = buffer_params
        self.warnings_container = WarningsContainer()

    def get_warnings(self):
        """
        Validates/checks a given GTFS feed with respect to a number of different issues.

        The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS

        Returns
        -------
        warnings: WarningsContainer
        """
        self.warnings_container.clear()
        self._validate_stops_with_same_stop_time()
        self._validate_speeds_and_trip_times()
        self._validate_stop_spacings()
        self._validate_stop_sequence()
        self._validate_misplaced_stops()
        self.warnings_container.print_summary()
        return self.warnings_container

    def _validate_misplaced_stops(self):
        if self.buffer_params:
            p = self.buffer_params
            center_lat = p['lat']
            center_lon = p['lon']
            distance = p['buffer_distance'] * 2 * 1000
            count = 0
            for stop_row in self.gtfs.stops().itertuples():
                if distance < wgs84_distance(center_lat, center_lon,
                                             stop_row.lat, stop_row.lon):
                    self.warnings_container.add_warning(
                        stop_row, WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY)
                    print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)

    def _validate_stops_with_same_stop_time(self):
        n_stops_with_same_time = 5
        # this query returns the trips where there are N or more stops with the same stop time
        rows = self.gtfs.get_cursor().execute(
            'SELECT '
            'trip_I, '
            'arr_time, '
            'N '
            'FROM '
            '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 '
            'WHERE N >= ?', (n_stops_with_same_time, ))
        for row in rows:
            self.warnings_container.add_warning(
                row, WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME)

    def _validate_stop_spacings(self):
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)
        max_stop_spacing = 20000  # meters
        max_time_between_stops = 1800  # seconds
        # this query calculates distance and travel time between consecutive stops
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'q1.stop_I as stop_1, '
            'q2.stop_I as stop_2, '
            'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, '
            'q2.arr_time_ds - q1.arr_time_ds as traveltime '
            'FROM '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            'trips, '
            'routes '
            'WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq '
            'AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I ').fetchall()
        for row in rows:
            if row[4] > max_stop_spacing:
                self.warnings_container.add_warning(row,
                                                    WARNING_LONG_STOP_SPACING)
            if row[5] > max_time_between_stops:
                self.warnings_container.add_warning(
                    row, WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS)

    def _validate_speeds_and_trip_times(self):
        # These are the mode - feasible speed combinations used here:
        # https://support.google.com/transitpartners/answer/1095482?hl=en
        gtfs_type_to_max_speed = {
            route_types.TRAM: 100,
            route_types.SUBWAY: 150,
            route_types.RAIL: 300,
            route_types.BUS: 100,
            route_types.FERRY: 80,
            route_types.CABLE_CAR: 50,
            route_types.GONDOLA: 50,
            route_types.FUNICULAR: 50,
            route_types.AIRCRAFT: 1000
        }
        max_trip_time = 7200  # seconds
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)

        # this query returns the total distance and travel time for each trip calculated for each stop spacing separately
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            ' q1.trip_I, '
            ' type, '
            ' sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, '
            ' sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime '
            ' FROM '
            '(SELECT * FROM stop_times, '
            'stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, '
            'stops WHERE stop_times.stop_I = stops.stop_I) q2, trips, routes WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I '
            '  AND trips.route_I = routes.route_I GROUP BY q1.trip_I'
        ).fetchall()

        for row in rows:
            avg_velocity = row[2] / max(row[3], 1) * 3.6
            if avg_velocity > gtfs_type_to_max_speed[row[1]]:
                self.warnings_container.add_warning(
                    row, WARNING_UNREALISTIC_AVERAGE_SPEED)

            if row[3] > max_trip_time:
                self.warnings_container.add_warning(row,
                                                    WARNING_LONG_TRIP_TIME)

    def _validate_stop_sequence(self):
        # this function checks if the stop sequence value is changing with +1 for each stop. This is not (yet) enforced
        rows = self.gtfs.execute_custom_query(
            'SELECT trip_I, dep_time_ds, seq '
            'FROM stop_times '
            'ORDER BY trip_I, dep_time_ds, seq').fetchall()

        old_trip_id = None
        for row in rows:
            new_trip_id = row[0]
            new_seq = row[2]
            if old_trip_id == new_trip_id:
                if old_seq + 1 != new_seq:
                    self.warnings_container.add_warning(
                        row, WARNING_STOP_SEQUENCE_ERROR)
            old_trip_id = row[0]
            old_seq = row[2]