def add_zone_to_stop_table(zone_shape_path=DEMAND_ZONES):
    """
    Creates table which relates stop_Is with TAZ zones and counts the number of stops
    :return:
    """
    crs = {"init": "espg:4326"}
    zones = gpd.read_file(zone_shape_path, crs=crs)
    for (name, gtfs_dict) in FEED_LIST:
        gtfs = GTFS(gtfs_dict["gtfs_dir"])
        df = gtfs.stops()
        geometry = [Point(xy) for xy in zip(df.lon, df.lat)]
        df = df.drop(["lon", "lat"], axis=1)

        gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
        zones_and_stops = gpd.sjoin(gdf, zones, how="inner", op='intersects')
        try:
            gtfs.execute_custom_query(
                """ALTER TABLE stops ADD COLUMN n_stops INT;""")
            gtfs.execute_custom_query(
                """ALTER TABLE stops ADD COLUMN zone_id INT;""")
        except OperationalError:
            pass
        subset = zones_and_stops[['WSP_ENN', 'stop_I']]
        tuples = [tuple(x) for x in subset.values]
        gtfs.conn.executemany(
            """UPDATE stops SET zone_id = ? WHERE stop_I = ?""", tuples)
        gtfs.conn.commit()
Beispiel #2
0
def get_stop_I_by_stop_id(stop_id):
    from gtfspy.gtfs import GTFS
    g = GTFS(IMPORTED_DATABASE_PATH)
    query = "SELECT stop_I FROM stops WHERE stop_id='" + str(stop_id) + "';"
    print(stop_id)
    stop_I = g.execute_custom_query(query).fetchone()[0]
    return stop_I
 def _correct_arrival_times_for_raw_db(self):
     """
     Some feeds have had incorrect arrival and/or departure times, where
     e.g. p0.dep_time_ds = p1.arr_time_ds =/= p1.dep_time_ds = p2.arr_time_ds etc.
     If this has been done, there are long waits at stops, while traveling between stops takes zero time.
     """
     g = GTFS(self.raw_db_path)
     g.execute_custom_query("""UPDATE stop_times SET arr_time_ds = dep_time_ds
                                 WHERE EXISTS (
                                     SELECT * FROM
                                         (SELECT st2.trip_I AS trip_I, st2.seq AS seq FROM
                                             (SELECT stop_I, seq, trip_I, arr_time_ds,  dep_time_ds,
                                              dep_time_ds - arr_time_ds AS wait_duration FROM stop_times) st1,
                                             (SELECT stop_I, seq, trip_I, arr_time_ds,  dep_time_ds,
                                              dep_time_ds - arr_time_ds AS wait_duration FROM stop_times) st2
                                                 WHERE st1.trip_I = st2.trip_I AND st1.seq+1 = st2.seq
                                                     AND st1.dep_time_ds >= st2.arr_time_ds AND NOT st1.wait_duration = 0
                                                     AND NOT st2.wait_duration = 0)
                                         stop_pairs
                                     WHERE stop_times.trip_I=stop_pairs.trip_I AND stop_times.seq=stop_pairs.seq)""")
     g.conn.commit()
Beispiel #4
0
def get_swimming_hall_stop_Is():
    from gtfspy.gtfs import GTFS
    g = GTFS(IMPORTED_DATABASE_PATH)
    query = 'SELECT stop_I FROM stops WHERE SUBSTR(stop_id, 0, 7)="SWIMMI";'
    stop_Is = [el[0] for el in g.execute_custom_query(query).fetchall()]
    return stop_Is
Beispiel #5
0
class TimetableValidator(object):
    def __init__(self, gtfs, buffer_params=None):
        """
        Parameters
        ----------
        gtfs: GTFS, or path to a GTFS object
            A GTFS object
        """
        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs
        self.buffer_params = buffer_params
        self.warnings_container = WarningsContainer()

    def validate_and_get_warnings(self):
        """
        Validates/checks a given GTFS feed with respect to a number of different issues.

        The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS

        Returns
        -------
        warnings: WarningsContainer
        """
        self.warnings_container.clear()
        self._validate_stops_with_same_stop_time()
        self._validate_speeds_and_trip_times()
        self._validate_stop_spacings()
        self._validate_stop_sequence()
        self._validate_misplaced_stops()
        return self.warnings_container

    def _validate_misplaced_stops(self):
        if self.buffer_params:
            p = self.buffer_params
            center_lat = p['lat']
            center_lon = p['lon']
            buffer_distance = p[
                'buffer_distance'] * 1000 * 1.002  # some error margin for rounding
            for stop_row in self.gtfs.stops().itertuples():
                if buffer_distance < wgs84_distance(
                        center_lat, center_lon, stop_row.lat, stop_row.lon):
                    self.warnings_container.add_warning(
                        WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)
                    print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)

    def _validate_stops_with_same_stop_time(self):
        n_stops_with_same_time = 5
        # this query returns the trips where there are N or more stops with the same stop time
        rows = self.gtfs.get_cursor().execute(
            'SELECT '
            'trip_I, '
            'arr_time, '
            'N '
            'FROM '
            '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 '
            'WHERE N >= ?', (n_stops_with_same_time, ))
        for row in rows:
            self.warnings_container.add_warning(
                WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME, row)

    def _validate_stop_spacings(self):
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)
        # this query calculates distance and travel time between consecutive stops
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'q1.stop_I as stop_1, '
            'q2.stop_I as stop_2, '
            'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, '
            'q2.arr_time_ds - q1.arr_time_ds as traveltime '
            'FROM '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            'trips, '
            'routes '
            'WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq '
            'AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I ').fetchall()
        for row in rows:
            if row[4] > MAX_ALLOWED_DISTANCE_BETWEEN_CONSECUTIVE_STOPS:
                self.warnings_container.add_warning(WARNING_LONG_STOP_SPACING,
                                                    row)
            if row[5] > MAX_TIME_BETWEEN_STOPS:
                self.warnings_container.add_warning(
                    WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS, row)

    def _validate_speeds_and_trip_times(self):
        # These are the mode - feasible speed combinations used here:
        # https://support.google.com/transitpartners/answer/1095482?hl=en
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)

        # this query returns the total distance and travel time for each trip calculated for each stop spacing separately
        rows = pandas.read_sql(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, '  # sum used for getting total
            'sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime, '  # sum used for getting total
            'count(*)'  # for getting the total number of stops
            'FROM '
            '   (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '   (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            '    trips, '
            '    routes '
            'WHERE q1.trip_I = q2.trip_I AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I GROUP BY q1.trip_I',
            self.gtfs.conn)

        for row in rows.itertuples():
            avg_velocity_km_per_h = row.total_distance / max(
                row.total_traveltime, 1) * 3.6
            if avg_velocity_km_per_h > GTFS_TYPE_TO_MAX_SPEED[row.type]:
                self.warnings_container.add_warning(
                    WARNING_TRIP_UNREALISTIC_AVERAGE_SPEED + " (route_type=" +
                    str(row.type) + ")", row)
            if row.total_traveltime > MAX_TRIP_TIME:
                self.warnings_container.add_warning(
                    WARNING_LONG_TRIP_TIME.format(MAX_TRIP_TIME=MAX_TRIP_TIME),
                    row, 1)

    def _validate_stop_sequence(self):
        # This function checks if the seq values in stop_times are increasing with departure_time,
        # and that seq always increases by one.
        rows = self.gtfs.execute_custom_query(
            'SELECT trip_I, dep_time_ds, seq '
            'FROM stop_times '
            'ORDER BY trip_I, dep_time_ds, seq').fetchall()
        old_trip_id = None
        old_seq = None
        for row in rows:
            new_trip_id = int(row[0])
            new_seq = int(row[2])
            if old_trip_id == new_trip_id:
                if old_seq + 1 != new_seq:
                    self.warnings_container.add_warning(
                        WARNING_STOP_SEQUENCE_NOT_INCREMENTAL, row)
                if old_seq >= new_seq:
                    self.warnings_container.add_warning(
                        WARNING_STOP_SEQUENCE_ORDER_ERROR, row)
            old_trip_id = row[0]
            old_seq = row[2]
Beispiel #6
0
class TimetableValidator(object):
    def __init__(self, gtfs, buffer_params=None):
        """
        Parameters
        ----------
        gtfs: GTFS, or path to a GTFS object
            A GTFS object
        """
        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs
        self.buffer_params = buffer_params
        self.warnings_container = WarningsContainer()

    def get_warnings(self):
        """
        Validates/checks a given GTFS feed with respect to a number of different issues.

        The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS

        Returns
        -------
        warnings: WarningsContainer
        """
        self.warnings_container.clear()
        self._validate_stops_with_same_stop_time()
        self._validate_speeds_and_trip_times()
        self._validate_stop_spacings()
        self._validate_stop_sequence()
        self._validate_misplaced_stops()
        self.warnings_container.print_summary()
        return self.warnings_container

    def _validate_misplaced_stops(self):
        if self.buffer_params:
            p = self.buffer_params
            center_lat = p['lat']
            center_lon = p['lon']
            distance = p['buffer_distance'] * 2 * 1000
            count = 0
            for stop_row in self.gtfs.stops().itertuples():
                if distance < wgs84_distance(center_lat, center_lon,
                                             stop_row.lat, stop_row.lon):
                    self.warnings_container.add_warning(
                        stop_row, WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY)
                    print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)

    def _validate_stops_with_same_stop_time(self):
        n_stops_with_same_time = 5
        # this query returns the trips where there are N or more stops with the same stop time
        rows = self.gtfs.get_cursor().execute(
            'SELECT '
            'trip_I, '
            'arr_time, '
            'N '
            'FROM '
            '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 '
            'WHERE N >= ?', (n_stops_with_same_time, ))
        for row in rows:
            self.warnings_container.add_warning(
                row, WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME)

    def _validate_stop_spacings(self):
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)
        max_stop_spacing = 20000  # meters
        max_time_between_stops = 1800  # seconds
        # this query calculates distance and travel time between consecutive stops
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'q1.stop_I as stop_1, '
            'q2.stop_I as stop_2, '
            'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, '
            'q2.arr_time_ds - q1.arr_time_ds as traveltime '
            'FROM '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            'trips, '
            'routes '
            'WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq '
            'AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I ').fetchall()
        for row in rows:
            if row[4] > max_stop_spacing:
                self.warnings_container.add_warning(row,
                                                    WARNING_LONG_STOP_SPACING)
            if row[5] > max_time_between_stops:
                self.warnings_container.add_warning(
                    row, WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS)

    def _validate_speeds_and_trip_times(self):
        # These are the mode - feasible speed combinations used here:
        # https://support.google.com/transitpartners/answer/1095482?hl=en
        gtfs_type_to_max_speed = {
            route_types.TRAM: 100,
            route_types.SUBWAY: 150,
            route_types.RAIL: 300,
            route_types.BUS: 100,
            route_types.FERRY: 80,
            route_types.CABLE_CAR: 50,
            route_types.GONDOLA: 50,
            route_types.FUNICULAR: 50,
            route_types.AIRCRAFT: 1000
        }
        max_trip_time = 7200  # seconds
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)

        # this query returns the total distance and travel time for each trip calculated for each stop spacing separately
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            ' q1.trip_I, '
            ' type, '
            ' sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, '
            ' sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime '
            ' FROM '
            '(SELECT * FROM stop_times, '
            'stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, '
            'stops WHERE stop_times.stop_I = stops.stop_I) q2, trips, routes WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I '
            '  AND trips.route_I = routes.route_I GROUP BY q1.trip_I'
        ).fetchall()

        for row in rows:
            avg_velocity = row[2] / max(row[3], 1) * 3.6
            if avg_velocity > gtfs_type_to_max_speed[row[1]]:
                self.warnings_container.add_warning(
                    row, WARNING_UNREALISTIC_AVERAGE_SPEED)

            if row[3] > max_trip_time:
                self.warnings_container.add_warning(row,
                                                    WARNING_LONG_TRIP_TIME)

    def _validate_stop_sequence(self):
        # this function checks if the stop sequence value is changing with +1 for each stop. This is not (yet) enforced
        rows = self.gtfs.execute_custom_query(
            'SELECT trip_I, dep_time_ds, seq '
            'FROM stop_times '
            'ORDER BY trip_I, dep_time_ds, seq').fetchall()

        old_trip_id = None
        for row in rows:
            new_trip_id = row[0]
            new_seq = row[2]
            if old_trip_id == new_trip_id:
                if old_seq + 1 != new_seq:
                    self.warnings_container.add_warning(
                        row, WARNING_STOP_SEQUENCE_ERROR)
            old_trip_id = row[0]
            old_seq = row[2]
Beispiel #7
0
class ImportValidator(object):
    def __init__(self, gtfssource, gtfs):
        """
        Parameters
        ----------
        gtfs_sources: list of strings
        gtfs: GTFS, or path to a GTFS object
            A GTFS object
        """
        self.df_freq_dict = {}
        if isinstance(gtfssource, string_types + (dict, )):
            self.gtfs_sources = [gtfssource]
        else:
            assert isinstance(gtfssource, list)
            self.gtfs_sources = gtfssource
        assert len(
            self.gtfs_sources
        ) > 0, "There needs to be some source files for validating an import"

        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs

        self.location = self.gtfs.get_location_name()
        self.warnings_container = WarningsContainer()

    def get_warnings(self):
        self.warnings_container.clear()
        self._validate_table_counts()
        self._validate_no_nulls()
        self._validate_danglers()
        self.warnings_container.print_summary()
        return self.warnings_container

    def _validate_table_counts(self):
        """
        Imports source .txt files, checks row counts and then compares the rowcounts with the gtfsobject
        :return:
        """
        for table_name_txt, db_table_name, row_warning in zip(
                SOURCE_TABLE_NAMES, DB_TABLE_NAMES, ROW_WARNINGS):
            source_row_count = 0

            for gtfs_source in self.gtfs_sources:
                frequencies_in_source = source_table_txt_to_pandas(
                    gtfs_source, 'frequencies.txt')
                try:
                    if table_name_txt == 'trips' and not frequencies_in_source.empty:
                        source_row_count += self._frequency_generated_trips(
                            gtfs_source, table_name_txt)

                    elif table_name_txt == 'stop_times' and not frequencies_in_source.empty:
                        source_row_count += self._frequency_generated_stop_times(
                            gtfs_source, table_name_txt)
                    else:
                        df = source_table_txt_to_pandas(
                            gtfs_source, table_name_txt)

                        source_row_count += len(df.index)
                except (IOError) as e:
                    print(e)
                    pass

            # Result from GTFSobj:
            database_row_count = self.gtfs.get_row_count(db_table_name)
            if source_row_count == database_row_count:
                print("Row counts match for " + table_name_txt +
                      " between the source and database (" +
                      str(database_row_count) + ")")

            else:
                difference = database_row_count - source_row_count
                print('Row counts do not match for ' + str(table_name_txt) +
                      ': (source=' + str(source_row_count) + ', database=' +
                      str(database_row_count) + ")")
                if table_name_txt == "calendar" and difference > 0:
                    query = "SELECT count(*) FROM (SELECT * FROM calendar ORDER BY service_I DESC LIMIT " \
                            + str(int(difference)) + \
                            ") WHERE start_date=end_date AND m=0 AND t=0 AND w=0 AND th=0 AND f=0 AND s=0 AND su=0"
                    number_of_entries_added_by_calendar_dates_loader = self.gtfs.execute_custom_query(
                        query).fetchone()[0]
                    if number_of_entries_added_by_calendar_dates_loader == difference:
                        print(
                            "    But don't worry, the extra entries seem to just dummy entries due to calendar_dates"
                        )
                    else:
                        print("    Reason for this is unknown.")
                        self.warnings_container.add_warning(
                            self.location, row_warning, difference)
                else:
                    self.warnings_container.add_warning(
                        self.location, row_warning, difference)

    def _validate_no_nulls(self):
        """
        Loads the tables from the gtfs object and counts the number of rows that have null values in
        fields that should not be null. Stores the number of null rows in warnings_container
        """
        for table, null_warning in zip(DB_TABLE_NAMES, NULL_WARNINGS):
            # TODO: make this validation source by source
            df = self.gtfs.get_table(table)
            df.drop(FIELDS_WHERE_NULL_OK[table], inplace=True, axis=1)
            # print(df.to_string())
            len_table = len(df.index)
            df.dropna(inplace=True, axis=0)
            len_non_null = len(df.index)
            nullrows = len_table - len_non_null
            if nullrows > 0:
                # print('Warning: Null values detected in table ' + table)
                self.warnings_container.add_warning(self.location,
                                                    null_warning,
                                                    value=nullrows)

    def _validate_danglers(self):
        """
        Checks for rows that are not referenced in the the tables that should be linked

        stops <> stop_times using stop_I
        stop_times <> trips <> days, using trip_I
        trips <> routes, using route_I
        :return:
        """
        for query, warning in zip(DANGLER_QUERIES, DANGLER_WARNINGS):
            dangler_count = self.gtfs.execute_custom_query(query).fetchone()[0]
            if dangler_count > 0:
                print(str(dangler_count) + " " + warning)
                self.warnings_container.add_warning(self.location,
                                                    warning,
                                                    value=dangler_count)

    def _frequency_generated_trips(self, source, txt):
        """
        This function calculates the equivalent rowcounts for trips when
        taking into account the generated rows in the gtfs object
        :param source: path to the source file
        :param txt: txt file in question
        :return: sum of all trips
        """
        df_freq = source_table_txt_to_pandas(source, u'frequencies.txt')
        df_trips = source_table_txt_to_pandas(source, txt)
        df_freq['n_trips'] = df_freq.apply(lambda row: len(
            range(str_time_to_day_seconds(row['start_time']),
                  str_time_to_day_seconds(row['end_time']), row['headway_secs']
                  )),
                                           axis=1)
        self.df_freq_dict[source] = df_freq
        df_trips_freq = pd.merge(df_freq, df_trips, how='outer', on='trip_id')

        return int(df_trips_freq['n_trips'].fillna(1).sum(axis=0))

    def _frequency_generated_stop_times(self, source, txt):
        """
        same as above except for stop times table
        :param source:
        :param txt:
        :return:
        """
        df_stop_times = source_table_txt_to_pandas(source, txt)
        df_freq = self.df_freq_dict[source]
        df_stop_freq = pd.merge(df_freq,
                                df_stop_times,
                                how='outer',
                                on='trip_id')

        return int(df_stop_freq['n_trips'].fillna(1).sum(axis=0))
Beispiel #8
0
class ImportValidator(object):
    def __init__(self, gtfssource, gtfs, verbose=True):
        """
        Parameters
        ----------
        gtfs_sources: list, string, dict
            list of paths to the strings, or a dictionary directly containing the gtfs data directly
        gtfs: gtfspy.gtfs.GTFS, or path to a relevant .sqlite GTFS database
        verbose: bool
            Whether or not to print warnings on-the-fly.
        """
        if isinstance(gtfssource, string_types + (dict, )):
            self.gtfs_sources = [gtfssource]
        else:
            assert isinstance(gtfssource, list)
            self.gtfs_sources = gtfssource
        assert len(
            self.gtfs_sources
        ) > 0, "There needs to be some source files for validating an import"

        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs

        self.location = self.gtfs.get_location_name()
        self.warnings_container = WarningsContainer()
        self.verbose = verbose

    def validate_and_get_warnings(self):
        self.warnings_container.clear()
        self._validate_table_row_counts()
        self._validate_no_null_values()
        self._validate_danglers()
        return self.warnings_container

    def _validate_table_row_counts(self):
        """
        Imports source .txt files, checks row counts and then compares the rowcounts with the gtfsobject
        :return:
        """
        for db_table_name in DB_TABLE_NAME_TO_SOURCE_FILE.keys():
            table_name_source_file = DB_TABLE_NAME_TO_SOURCE_FILE[
                db_table_name]
            row_warning_str = DB_TABLE_NAME_TO_ROWS_MISSING_WARNING[
                db_table_name]

            # Row count in GTFS object:
            database_row_count = self.gtfs.get_row_count(db_table_name)

            # Row counts in source files:
            source_row_count = 0
            for gtfs_source in self.gtfs_sources:
                frequencies_in_source = source_csv_to_pandas(
                    gtfs_source, 'frequencies.txt')
                try:
                    if table_name_source_file == 'trips' and not frequencies_in_source.empty:
                        source_row_count += self._frequency_generated_trips_rows(
                            gtfs_source)

                    elif table_name_source_file == 'stop_times' and not frequencies_in_source.empty:
                        source_row_count += self._compute_number_of_frequency_generated_stop_times(
                            gtfs_source)
                    else:
                        df = source_csv_to_pandas(gtfs_source,
                                                  table_name_source_file)

                        source_row_count += len(df.index)
                except IOError as e:
                    if hasattr(e, "filename") and db_table_name in e.filename:
                        pass
                    else:
                        raise e

            if source_row_count == database_row_count and self.verbose:
                print("Row counts match for " + table_name_source_file +
                      " between the source and database (" +
                      str(database_row_count) + ")")
            else:
                difference = database_row_count - source_row_count
                ('Row counts do not match for ' + str(table_name_source_file) +
                 ': (source=' + str(source_row_count) + ', database=' +
                 str(database_row_count) + ")")
                if table_name_source_file == "calendar" and difference > 0:
                    query = "SELECT count(*) FROM (SELECT * FROM calendar ORDER BY service_I DESC LIMIT " \
                            + str(int(difference)) + \
                            ") WHERE start_date=end_date AND m=0 AND t=0 AND w=0 AND th=0 AND f=0 AND s=0 AND su=0"
                    number_of_entries_added_by_calendar_dates_loader = self.gtfs.execute_custom_query(
                        query).fetchone()[0]
                    if number_of_entries_added_by_calendar_dates_loader == difference and self.verbose:
                        print(
                            "    But don't worry, the extra entries seem to just dummy entries due to calendar_dates"
                        )
                    else:
                        if self.verbose:
                            print("    Reason for this is unknown.")
                        self.warnings_container.add_warning(
                            row_warning_str, self.location, difference)
                else:
                    self.warnings_container.add_warning(
                        row_warning_str, self.location, difference)

    def _validate_no_null_values(self):
        """
        Loads the tables from the gtfs object and counts the number of rows that have null values in
        fields that should not be null. Stores the number of null rows in warnings_container
        """
        for table in DB_TABLE_NAMES:
            null_not_ok_warning = "Null values in must-have columns in table {table}".format(
                table=table)
            null_warn_warning = "Null values in good-to-have columns in table {table}".format(
                table=table)
            null_not_ok_fields = DB_TABLE_NAME_TO_FIELDS_WHERE_NULL_NOT_OK[
                table]
            null_warn_fields = DB_TABLE_NAME_TO_FIELDS_WHERE_NULL_OK_BUT_WARN[
                table]

            # CW, TODO: make this validation source by source
            df = self.gtfs.get_table(table)

            for warning, fields in zip(
                [null_not_ok_warning, null_warn_warning],
                [null_not_ok_fields, null_warn_fields]):
                null_unwanted_df = df[fields]
                rows_having_null = null_unwanted_df.isnull().any(1)
                if sum(rows_having_null) > 0:
                    rows_having_unwanted_null = df[rows_having_null.values]
                    self.warnings_container.add_warning(
                        warning, rows_having_unwanted_null,
                        len(rows_having_unwanted_null))

    def _validate_danglers(self):
        """
        Checks for rows that are not referenced in the the tables that should be linked

        stops <> stop_times using stop_I
        stop_times <> trips <> days, using trip_I
        trips <> routes, using route_I
        :return:
        """
        for query, warning in zip(DANGLER_QUERIES, DANGLER_WARNINGS):
            dangler_count = self.gtfs.execute_custom_query(query).fetchone()[0]
            if dangler_count > 0:
                if self.verbose:
                    print(str(dangler_count) + " " + warning)
                self.warnings_container.add_warning(warning,
                                                    self.location,
                                                    count=dangler_count)

    def _frequency_generated_trips_rows(self,
                                        gtfs_soure_path,
                                        return_df_freq=False):
        """
        This function calculates the equivalent rowcounts for trips when
        taking into account the generated rows in the gtfs object
        Parameters
        ----------
        gtfs_soure_path: path to the source file
        param txt: txt file in question
        :return: sum of all trips
        """
        df_freq = source_csv_to_pandas(gtfs_soure_path, 'frequencies')
        df_trips = source_csv_to_pandas(gtfs_soure_path, "trips")
        df_freq['n_trips'] = df_freq.apply(lambda row: len(
            range(str_time_to_day_seconds(row['start_time']),
                  str_time_to_day_seconds(row['end_time']), row['headway_secs']
                  )),
                                           axis=1)
        df_trips_freq = pd.merge(df_freq, df_trips, how='outer', on='trip_id')
        n_freq_generated_trips = int(
            df_trips_freq['n_trips'].fillna(1).sum(axis=0))
        if return_df_freq:
            return df_trips_freq
        else:
            return n_freq_generated_trips

    def _compute_number_of_frequency_generated_stop_times(
            self, gtfs_source_path):
        """
        Parameters
        ----------
        Same as for "_frequency_generated_trips_rows" but for stop times table
        gtfs_source_path:
        table_name:

        Return
        ------
        """
        df_freq = self._frequency_generated_trips_rows(gtfs_source_path,
                                                       return_df_freq=True)
        df_stop_times = source_csv_to_pandas(gtfs_source_path, "stop_times")
        df_stop_freq = pd.merge(df_freq,
                                df_stop_times,
                                how='outer',
                                on='trip_id')
        return int(df_stop_freq['n_trips'].fillna(1).sum(axis=0))