def add_zone_to_stop_table(zone_shape_path=DEMAND_ZONES): """ Creates table which relates stop_Is with TAZ zones and counts the number of stops :return: """ crs = {"init": "espg:4326"} zones = gpd.read_file(zone_shape_path, crs=crs) for (name, gtfs_dict) in FEED_LIST: gtfs = GTFS(gtfs_dict["gtfs_dir"]) df = gtfs.stops() geometry = [Point(xy) for xy in zip(df.lon, df.lat)] df = df.drop(["lon", "lat"], axis=1) gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry) zones_and_stops = gpd.sjoin(gdf, zones, how="inner", op='intersects') try: gtfs.execute_custom_query( """ALTER TABLE stops ADD COLUMN n_stops INT;""") gtfs.execute_custom_query( """ALTER TABLE stops ADD COLUMN zone_id INT;""") except OperationalError: pass subset = zones_and_stops[['WSP_ENN', 'stop_I']] tuples = [tuple(x) for x in subset.values] gtfs.conn.executemany( """UPDATE stops SET zone_id = ? WHERE stop_I = ?""", tuples) gtfs.conn.commit()
def get_stop_I_by_stop_id(stop_id): from gtfspy.gtfs import GTFS g = GTFS(IMPORTED_DATABASE_PATH) query = "SELECT stop_I FROM stops WHERE stop_id='" + str(stop_id) + "';" print(stop_id) stop_I = g.execute_custom_query(query).fetchone()[0] return stop_I
def _correct_arrival_times_for_raw_db(self): """ Some feeds have had incorrect arrival and/or departure times, where e.g. p0.dep_time_ds = p1.arr_time_ds =/= p1.dep_time_ds = p2.arr_time_ds etc. If this has been done, there are long waits at stops, while traveling between stops takes zero time. """ g = GTFS(self.raw_db_path) g.execute_custom_query("""UPDATE stop_times SET arr_time_ds = dep_time_ds WHERE EXISTS ( SELECT * FROM (SELECT st2.trip_I AS trip_I, st2.seq AS seq FROM (SELECT stop_I, seq, trip_I, arr_time_ds, dep_time_ds, dep_time_ds - arr_time_ds AS wait_duration FROM stop_times) st1, (SELECT stop_I, seq, trip_I, arr_time_ds, dep_time_ds, dep_time_ds - arr_time_ds AS wait_duration FROM stop_times) st2 WHERE st1.trip_I = st2.trip_I AND st1.seq+1 = st2.seq AND st1.dep_time_ds >= st2.arr_time_ds AND NOT st1.wait_duration = 0 AND NOT st2.wait_duration = 0) stop_pairs WHERE stop_times.trip_I=stop_pairs.trip_I AND stop_times.seq=stop_pairs.seq)""") g.conn.commit()
def get_swimming_hall_stop_Is(): from gtfspy.gtfs import GTFS g = GTFS(IMPORTED_DATABASE_PATH) query = 'SELECT stop_I FROM stops WHERE SUBSTR(stop_id, 0, 7)="SWIMMI";' stop_Is = [el[0] for el in g.execute_custom_query(query).fetchall()] return stop_Is
class TimetableValidator(object): def __init__(self, gtfs, buffer_params=None): """ Parameters ---------- gtfs: GTFS, or path to a GTFS object A GTFS object """ if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.buffer_params = buffer_params self.warnings_container = WarningsContainer() def validate_and_get_warnings(self): """ Validates/checks a given GTFS feed with respect to a number of different issues. The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS Returns ------- warnings: WarningsContainer """ self.warnings_container.clear() self._validate_stops_with_same_stop_time() self._validate_speeds_and_trip_times() self._validate_stop_spacings() self._validate_stop_sequence() self._validate_misplaced_stops() return self.warnings_container def _validate_misplaced_stops(self): if self.buffer_params: p = self.buffer_params center_lat = p['lat'] center_lon = p['lon'] buffer_distance = p[ 'buffer_distance'] * 1000 * 1.002 # some error margin for rounding for stop_row in self.gtfs.stops().itertuples(): if buffer_distance < wgs84_distance( center_lat, center_lon, stop_row.lat, stop_row.lon): self.warnings_container.add_warning( WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row) print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row) def _validate_stops_with_same_stop_time(self): n_stops_with_same_time = 5 # this query returns the trips where there are N or more stops with the same stop time rows = self.gtfs.get_cursor().execute( 'SELECT ' 'trip_I, ' 'arr_time, ' 'N ' 'FROM ' '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 ' 'WHERE N >= ?', (n_stops_with_same_time, )) for row in rows: self.warnings_container.add_warning( WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME, row) def _validate_stop_spacings(self): self.gtfs.conn.create_function("find_distance", 4, wgs84_distance) # this query calculates distance and travel time between consecutive stops rows = self.gtfs.execute_custom_query( 'SELECT ' 'q1.trip_I, ' 'type, ' 'q1.stop_I as stop_1, ' 'q2.stop_I as stop_2, ' 'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, ' 'q2.arr_time_ds - q1.arr_time_ds as traveltime ' 'FROM ' '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, ' '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, ' 'trips, ' 'routes ' 'WHERE q1.trip_I = q2.trip_I ' 'AND q1.seq + 1 = q2.seq ' 'AND q1.trip_I = trips.trip_I ' 'AND trips.route_I = routes.route_I ').fetchall() for row in rows: if row[4] > MAX_ALLOWED_DISTANCE_BETWEEN_CONSECUTIVE_STOPS: self.warnings_container.add_warning(WARNING_LONG_STOP_SPACING, row) if row[5] > MAX_TIME_BETWEEN_STOPS: self.warnings_container.add_warning( WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS, row) def _validate_speeds_and_trip_times(self): # These are the mode - feasible speed combinations used here: # https://support.google.com/transitpartners/answer/1095482?hl=en self.gtfs.conn.create_function("find_distance", 4, wgs84_distance) # this query returns the total distance and travel time for each trip calculated for each stop spacing separately rows = pandas.read_sql( 'SELECT ' 'q1.trip_I, ' 'type, ' 'sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, ' # sum used for getting total 'sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime, ' # sum used for getting total 'count(*)' # for getting the total number of stops 'FROM ' ' (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, ' ' (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, ' ' trips, ' ' routes ' 'WHERE q1.trip_I = q2.trip_I AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I ' 'AND trips.route_I = routes.route_I GROUP BY q1.trip_I', self.gtfs.conn) for row in rows.itertuples(): avg_velocity_km_per_h = row.total_distance / max( row.total_traveltime, 1) * 3.6 if avg_velocity_km_per_h > GTFS_TYPE_TO_MAX_SPEED[row.type]: self.warnings_container.add_warning( WARNING_TRIP_UNREALISTIC_AVERAGE_SPEED + " (route_type=" + str(row.type) + ")", row) if row.total_traveltime > MAX_TRIP_TIME: self.warnings_container.add_warning( WARNING_LONG_TRIP_TIME.format(MAX_TRIP_TIME=MAX_TRIP_TIME), row, 1) def _validate_stop_sequence(self): # This function checks if the seq values in stop_times are increasing with departure_time, # and that seq always increases by one. rows = self.gtfs.execute_custom_query( 'SELECT trip_I, dep_time_ds, seq ' 'FROM stop_times ' 'ORDER BY trip_I, dep_time_ds, seq').fetchall() old_trip_id = None old_seq = None for row in rows: new_trip_id = int(row[0]) new_seq = int(row[2]) if old_trip_id == new_trip_id: if old_seq + 1 != new_seq: self.warnings_container.add_warning( WARNING_STOP_SEQUENCE_NOT_INCREMENTAL, row) if old_seq >= new_seq: self.warnings_container.add_warning( WARNING_STOP_SEQUENCE_ORDER_ERROR, row) old_trip_id = row[0] old_seq = row[2]
class TimetableValidator(object): def __init__(self, gtfs, buffer_params=None): """ Parameters ---------- gtfs: GTFS, or path to a GTFS object A GTFS object """ if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.buffer_params = buffer_params self.warnings_container = WarningsContainer() def get_warnings(self): """ Validates/checks a given GTFS feed with respect to a number of different issues. The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS Returns ------- warnings: WarningsContainer """ self.warnings_container.clear() self._validate_stops_with_same_stop_time() self._validate_speeds_and_trip_times() self._validate_stop_spacings() self._validate_stop_sequence() self._validate_misplaced_stops() self.warnings_container.print_summary() return self.warnings_container def _validate_misplaced_stops(self): if self.buffer_params: p = self.buffer_params center_lat = p['lat'] center_lon = p['lon'] distance = p['buffer_distance'] * 2 * 1000 count = 0 for stop_row in self.gtfs.stops().itertuples(): if distance < wgs84_distance(center_lat, center_lon, stop_row.lat, stop_row.lon): self.warnings_container.add_warning( stop_row, WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY) print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row) def _validate_stops_with_same_stop_time(self): n_stops_with_same_time = 5 # this query returns the trips where there are N or more stops with the same stop time rows = self.gtfs.get_cursor().execute( 'SELECT ' 'trip_I, ' 'arr_time, ' 'N ' 'FROM ' '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 ' 'WHERE N >= ?', (n_stops_with_same_time, )) for row in rows: self.warnings_container.add_warning( row, WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME) def _validate_stop_spacings(self): self.gtfs.conn.create_function("find_distance", 4, wgs84_distance) max_stop_spacing = 20000 # meters max_time_between_stops = 1800 # seconds # this query calculates distance and travel time between consecutive stops rows = self.gtfs.execute_custom_query( 'SELECT ' 'q1.trip_I, ' 'type, ' 'q1.stop_I as stop_1, ' 'q2.stop_I as stop_2, ' 'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, ' 'q2.arr_time_ds - q1.arr_time_ds as traveltime ' 'FROM ' '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, ' '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, ' 'trips, ' 'routes ' 'WHERE q1.trip_I = q2.trip_I ' 'AND q1.seq + 1 = q2.seq ' 'AND q1.trip_I = trips.trip_I ' 'AND trips.route_I = routes.route_I ').fetchall() for row in rows: if row[4] > max_stop_spacing: self.warnings_container.add_warning(row, WARNING_LONG_STOP_SPACING) if row[5] > max_time_between_stops: self.warnings_container.add_warning( row, WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS) def _validate_speeds_and_trip_times(self): # These are the mode - feasible speed combinations used here: # https://support.google.com/transitpartners/answer/1095482?hl=en gtfs_type_to_max_speed = { route_types.TRAM: 100, route_types.SUBWAY: 150, route_types.RAIL: 300, route_types.BUS: 100, route_types.FERRY: 80, route_types.CABLE_CAR: 50, route_types.GONDOLA: 50, route_types.FUNICULAR: 50, route_types.AIRCRAFT: 1000 } max_trip_time = 7200 # seconds self.gtfs.conn.create_function("find_distance", 4, wgs84_distance) # this query returns the total distance and travel time for each trip calculated for each stop spacing separately rows = self.gtfs.execute_custom_query( 'SELECT ' ' q1.trip_I, ' ' type, ' ' sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, ' ' sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime ' ' FROM ' '(SELECT * FROM stop_times, ' 'stops WHERE stop_times.stop_I = stops.stop_I) q1, ' '(SELECT * FROM stop_times, ' 'stops WHERE stop_times.stop_I = stops.stop_I) q2, trips, routes WHERE q1.trip_I = q2.trip_I ' 'AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I ' ' AND trips.route_I = routes.route_I GROUP BY q1.trip_I' ).fetchall() for row in rows: avg_velocity = row[2] / max(row[3], 1) * 3.6 if avg_velocity > gtfs_type_to_max_speed[row[1]]: self.warnings_container.add_warning( row, WARNING_UNREALISTIC_AVERAGE_SPEED) if row[3] > max_trip_time: self.warnings_container.add_warning(row, WARNING_LONG_TRIP_TIME) def _validate_stop_sequence(self): # this function checks if the stop sequence value is changing with +1 for each stop. This is not (yet) enforced rows = self.gtfs.execute_custom_query( 'SELECT trip_I, dep_time_ds, seq ' 'FROM stop_times ' 'ORDER BY trip_I, dep_time_ds, seq').fetchall() old_trip_id = None for row in rows: new_trip_id = row[0] new_seq = row[2] if old_trip_id == new_trip_id: if old_seq + 1 != new_seq: self.warnings_container.add_warning( row, WARNING_STOP_SEQUENCE_ERROR) old_trip_id = row[0] old_seq = row[2]
class ImportValidator(object): def __init__(self, gtfssource, gtfs): """ Parameters ---------- gtfs_sources: list of strings gtfs: GTFS, or path to a GTFS object A GTFS object """ self.df_freq_dict = {} if isinstance(gtfssource, string_types + (dict, )): self.gtfs_sources = [gtfssource] else: assert isinstance(gtfssource, list) self.gtfs_sources = gtfssource assert len( self.gtfs_sources ) > 0, "There needs to be some source files for validating an import" if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.location = self.gtfs.get_location_name() self.warnings_container = WarningsContainer() def get_warnings(self): self.warnings_container.clear() self._validate_table_counts() self._validate_no_nulls() self._validate_danglers() self.warnings_container.print_summary() return self.warnings_container def _validate_table_counts(self): """ Imports source .txt files, checks row counts and then compares the rowcounts with the gtfsobject :return: """ for table_name_txt, db_table_name, row_warning in zip( SOURCE_TABLE_NAMES, DB_TABLE_NAMES, ROW_WARNINGS): source_row_count = 0 for gtfs_source in self.gtfs_sources: frequencies_in_source = source_table_txt_to_pandas( gtfs_source, 'frequencies.txt') try: if table_name_txt == 'trips' and not frequencies_in_source.empty: source_row_count += self._frequency_generated_trips( gtfs_source, table_name_txt) elif table_name_txt == 'stop_times' and not frequencies_in_source.empty: source_row_count += self._frequency_generated_stop_times( gtfs_source, table_name_txt) else: df = source_table_txt_to_pandas( gtfs_source, table_name_txt) source_row_count += len(df.index) except (IOError) as e: print(e) pass # Result from GTFSobj: database_row_count = self.gtfs.get_row_count(db_table_name) if source_row_count == database_row_count: print("Row counts match for " + table_name_txt + " between the source and database (" + str(database_row_count) + ")") else: difference = database_row_count - source_row_count print('Row counts do not match for ' + str(table_name_txt) + ': (source=' + str(source_row_count) + ', database=' + str(database_row_count) + ")") if table_name_txt == "calendar" and difference > 0: query = "SELECT count(*) FROM (SELECT * FROM calendar ORDER BY service_I DESC LIMIT " \ + str(int(difference)) + \ ") WHERE start_date=end_date AND m=0 AND t=0 AND w=0 AND th=0 AND f=0 AND s=0 AND su=0" number_of_entries_added_by_calendar_dates_loader = self.gtfs.execute_custom_query( query).fetchone()[0] if number_of_entries_added_by_calendar_dates_loader == difference: print( " But don't worry, the extra entries seem to just dummy entries due to calendar_dates" ) else: print(" Reason for this is unknown.") self.warnings_container.add_warning( self.location, row_warning, difference) else: self.warnings_container.add_warning( self.location, row_warning, difference) def _validate_no_nulls(self): """ Loads the tables from the gtfs object and counts the number of rows that have null values in fields that should not be null. Stores the number of null rows in warnings_container """ for table, null_warning in zip(DB_TABLE_NAMES, NULL_WARNINGS): # TODO: make this validation source by source df = self.gtfs.get_table(table) df.drop(FIELDS_WHERE_NULL_OK[table], inplace=True, axis=1) # print(df.to_string()) len_table = len(df.index) df.dropna(inplace=True, axis=0) len_non_null = len(df.index) nullrows = len_table - len_non_null if nullrows > 0: # print('Warning: Null values detected in table ' + table) self.warnings_container.add_warning(self.location, null_warning, value=nullrows) def _validate_danglers(self): """ Checks for rows that are not referenced in the the tables that should be linked stops <> stop_times using stop_I stop_times <> trips <> days, using trip_I trips <> routes, using route_I :return: """ for query, warning in zip(DANGLER_QUERIES, DANGLER_WARNINGS): dangler_count = self.gtfs.execute_custom_query(query).fetchone()[0] if dangler_count > 0: print(str(dangler_count) + " " + warning) self.warnings_container.add_warning(self.location, warning, value=dangler_count) def _frequency_generated_trips(self, source, txt): """ This function calculates the equivalent rowcounts for trips when taking into account the generated rows in the gtfs object :param source: path to the source file :param txt: txt file in question :return: sum of all trips """ df_freq = source_table_txt_to_pandas(source, u'frequencies.txt') df_trips = source_table_txt_to_pandas(source, txt) df_freq['n_trips'] = df_freq.apply(lambda row: len( range(str_time_to_day_seconds(row['start_time']), str_time_to_day_seconds(row['end_time']), row['headway_secs'] )), axis=1) self.df_freq_dict[source] = df_freq df_trips_freq = pd.merge(df_freq, df_trips, how='outer', on='trip_id') return int(df_trips_freq['n_trips'].fillna(1).sum(axis=0)) def _frequency_generated_stop_times(self, source, txt): """ same as above except for stop times table :param source: :param txt: :return: """ df_stop_times = source_table_txt_to_pandas(source, txt) df_freq = self.df_freq_dict[source] df_stop_freq = pd.merge(df_freq, df_stop_times, how='outer', on='trip_id') return int(df_stop_freq['n_trips'].fillna(1).sum(axis=0))
class ImportValidator(object): def __init__(self, gtfssource, gtfs, verbose=True): """ Parameters ---------- gtfs_sources: list, string, dict list of paths to the strings, or a dictionary directly containing the gtfs data directly gtfs: gtfspy.gtfs.GTFS, or path to a relevant .sqlite GTFS database verbose: bool Whether or not to print warnings on-the-fly. """ if isinstance(gtfssource, string_types + (dict, )): self.gtfs_sources = [gtfssource] else: assert isinstance(gtfssource, list) self.gtfs_sources = gtfssource assert len( self.gtfs_sources ) > 0, "There needs to be some source files for validating an import" if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.location = self.gtfs.get_location_name() self.warnings_container = WarningsContainer() self.verbose = verbose def validate_and_get_warnings(self): self.warnings_container.clear() self._validate_table_row_counts() self._validate_no_null_values() self._validate_danglers() return self.warnings_container def _validate_table_row_counts(self): """ Imports source .txt files, checks row counts and then compares the rowcounts with the gtfsobject :return: """ for db_table_name in DB_TABLE_NAME_TO_SOURCE_FILE.keys(): table_name_source_file = DB_TABLE_NAME_TO_SOURCE_FILE[ db_table_name] row_warning_str = DB_TABLE_NAME_TO_ROWS_MISSING_WARNING[ db_table_name] # Row count in GTFS object: database_row_count = self.gtfs.get_row_count(db_table_name) # Row counts in source files: source_row_count = 0 for gtfs_source in self.gtfs_sources: frequencies_in_source = source_csv_to_pandas( gtfs_source, 'frequencies.txt') try: if table_name_source_file == 'trips' and not frequencies_in_source.empty: source_row_count += self._frequency_generated_trips_rows( gtfs_source) elif table_name_source_file == 'stop_times' and not frequencies_in_source.empty: source_row_count += self._compute_number_of_frequency_generated_stop_times( gtfs_source) else: df = source_csv_to_pandas(gtfs_source, table_name_source_file) source_row_count += len(df.index) except IOError as e: if hasattr(e, "filename") and db_table_name in e.filename: pass else: raise e if source_row_count == database_row_count and self.verbose: print("Row counts match for " + table_name_source_file + " between the source and database (" + str(database_row_count) + ")") else: difference = database_row_count - source_row_count ('Row counts do not match for ' + str(table_name_source_file) + ': (source=' + str(source_row_count) + ', database=' + str(database_row_count) + ")") if table_name_source_file == "calendar" and difference > 0: query = "SELECT count(*) FROM (SELECT * FROM calendar ORDER BY service_I DESC LIMIT " \ + str(int(difference)) + \ ") WHERE start_date=end_date AND m=0 AND t=0 AND w=0 AND th=0 AND f=0 AND s=0 AND su=0" number_of_entries_added_by_calendar_dates_loader = self.gtfs.execute_custom_query( query).fetchone()[0] if number_of_entries_added_by_calendar_dates_loader == difference and self.verbose: print( " But don't worry, the extra entries seem to just dummy entries due to calendar_dates" ) else: if self.verbose: print(" Reason for this is unknown.") self.warnings_container.add_warning( row_warning_str, self.location, difference) else: self.warnings_container.add_warning( row_warning_str, self.location, difference) def _validate_no_null_values(self): """ Loads the tables from the gtfs object and counts the number of rows that have null values in fields that should not be null. Stores the number of null rows in warnings_container """ for table in DB_TABLE_NAMES: null_not_ok_warning = "Null values in must-have columns in table {table}".format( table=table) null_warn_warning = "Null values in good-to-have columns in table {table}".format( table=table) null_not_ok_fields = DB_TABLE_NAME_TO_FIELDS_WHERE_NULL_NOT_OK[ table] null_warn_fields = DB_TABLE_NAME_TO_FIELDS_WHERE_NULL_OK_BUT_WARN[ table] # CW, TODO: make this validation source by source df = self.gtfs.get_table(table) for warning, fields in zip( [null_not_ok_warning, null_warn_warning], [null_not_ok_fields, null_warn_fields]): null_unwanted_df = df[fields] rows_having_null = null_unwanted_df.isnull().any(1) if sum(rows_having_null) > 0: rows_having_unwanted_null = df[rows_having_null.values] self.warnings_container.add_warning( warning, rows_having_unwanted_null, len(rows_having_unwanted_null)) def _validate_danglers(self): """ Checks for rows that are not referenced in the the tables that should be linked stops <> stop_times using stop_I stop_times <> trips <> days, using trip_I trips <> routes, using route_I :return: """ for query, warning in zip(DANGLER_QUERIES, DANGLER_WARNINGS): dangler_count = self.gtfs.execute_custom_query(query).fetchone()[0] if dangler_count > 0: if self.verbose: print(str(dangler_count) + " " + warning) self.warnings_container.add_warning(warning, self.location, count=dangler_count) def _frequency_generated_trips_rows(self, gtfs_soure_path, return_df_freq=False): """ This function calculates the equivalent rowcounts for trips when taking into account the generated rows in the gtfs object Parameters ---------- gtfs_soure_path: path to the source file param txt: txt file in question :return: sum of all trips """ df_freq = source_csv_to_pandas(gtfs_soure_path, 'frequencies') df_trips = source_csv_to_pandas(gtfs_soure_path, "trips") df_freq['n_trips'] = df_freq.apply(lambda row: len( range(str_time_to_day_seconds(row['start_time']), str_time_to_day_seconds(row['end_time']), row['headway_secs'] )), axis=1) df_trips_freq = pd.merge(df_freq, df_trips, how='outer', on='trip_id') n_freq_generated_trips = int( df_trips_freq['n_trips'].fillna(1).sum(axis=0)) if return_df_freq: return df_trips_freq else: return n_freq_generated_trips def _compute_number_of_frequency_generated_stop_times( self, gtfs_source_path): """ Parameters ---------- Same as for "_frequency_generated_trips_rows" but for stop times table gtfs_source_path: table_name: Return ------ """ df_freq = self._frequency_generated_trips_rows(gtfs_source_path, return_df_freq=True) df_stop_times = source_csv_to_pandas(gtfs_source_path, "stop_times") df_stop_freq = pd.merge(df_freq, df_stop_times, how='outer', on='trip_id') return int(df_stop_freq['n_trips'].fillna(1).sum(axis=0))