def get_representative_feed(file_loc: str, day_type: str='busiest') -> ptg.gtfs.Feed: """ Given a filepath, extract a partridge feed object, holding a \ representative set of schedule patterns, extracted from the GTFS zip \ file, as a set of pandas DataFrames. Parameters ---------- file_loc : str The location (filepath) of the GTFS zip file. day_type : str The name of the type of representative feed desired. Currently, only \ one type is supported, busiest. This extracts the schedule pattern \ for a day that has the most service on it. This is determined by the \ day with the most trips on it. Returns ------- feed : ptg.gtfs.Feed A partridge feed object, holding related schedule information as \ pandas DataFrames for the busiest day in the available schedule. """ # Extract service ids and then trip counts by those dates try: service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Raised by partridge if no valid dates returned except AssertionError: # Make sure we have some valid values returned in trips raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # TODO: Due to partridge's assertion error being raised, this # check may no longer be needed. if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # At this point, different methods can be implemented to help select how # to pick which date/schedule id to use if day_type == 'busiest': # Choose the service id that has the most trips associated with it (selected_date, trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1]) else: raise NotImplementedError('Unsupported day type string supplied.') log('Selected_date: {}'.format(selected_date)) log('Number of trips on that date: {}'.format(trip_count)) all_service_ids = '\n\t'.join(service_ids_by_date[selected_date]) log('\nAll related service IDs: \n\t{}'.format(all_service_ids)) sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} return ptg.load_feed(file_loc, view=feed_query)
def get_representative_feed(file_loc: str, day_type: str = 'busiest'): # Extract service ids and then trip counts by those dates service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Make sure we have some valid values returned in trips if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # At this point, different methods can be implemented to help select how # to pick which date/schedule id to use if day_type == 'busiest': # Choose the service id that has the most trips associated with it (selected_date, trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1]) else: raise NotImplementedError('Unsupported day type string supplied.') log('Selected_date: {}'.format(selected_date)) log('Number of trips on that date: {}'.format(trip_count)) all_service_ids = '\n\t'.join(service_ids_by_date[selected_date]) log('\nAll related service IDs: \n\t{}'.format(all_service_ids)) sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} return ptg.feed(file_loc, view=feed_query)
def test_trip_counts_by_date(path): trip_counts_by_date = ptg.read_trip_counts_by_date(path) assert trip_counts_by_date == { datetime.date(2017, 8, 1): 442, datetime.date(2017, 8, 2): 442, datetime.date(2017, 8, 3): 442, datetime.date(2017, 8, 4): 442, datetime.date(2017, 8, 5): 1, datetime.date(2017, 8, 7): 442, }
def get_representative_feed(self,file_loc: str, the_date: str): year, month, day = map(int, the_date.split("/")) selected_date = date(year, month, day) # Extract service ids and then trip counts by those dates service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Make sure we have some valid values returned in trips if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} feeds=ptg.load_feed(file_loc, view=feed_query) return feeds