def test_config_must_be_dag(): config = ptg.config.default_config() assert config.has_edge("routes.txt", "trips.txt") # Make a cycle config.add_edge("trips.txt", "routes.txt") path = fixture("amazon-2017-08-06") with pytest.raises(ValueError, message="Config must be a DAG"): ptg.load_feed(path, config=config)
def read_feed(feed_path: str = None) -> Tuple[nx.DiGraph, Feed]: """ Read GTFS feed from folder and return a config and Partridge Feed object """ config = geo_config() config.nodes["shapes.txt"]["required_columns"] = config.nodes[ "shapes.txt"]["required_columns"] + ("A", "B", "LINK_ID") try: feed = ptg.load_feed(feed_path, config=config) TransitNetwork.validate_feed(feed, config) except KeyError: config = default_config() config.nodes["shapes.txt"]["required_columns"] = ( "shape_id", "A", "B", "LINK_ID", ) WranglerLogger.warning( "Reducing data requirements for shapes.txt to:", config.nodes["shapes.txt"]["required_columns"], ) feed = ptg.load_feed(feed_path, config=config) TransitNetwork.validate_feed(feed, config) ## todo should be read in as a schema WranglerLogger.info( "Read %s agencies from %s" % (feed.agency.size, os.path.join(feed_path, "agency.txt"))) WranglerLogger.info("Read %s frequencies from %s" % (feed.frequencies.size, os.path.join(feed_path, "frequencies.txt"))) WranglerLogger.info( "Read %s routes from %s" % (feed.routes.size, os.path.join(feed_path, "routes.txt"))) WranglerLogger.info( "Read %s shapes from %s" % (feed.shapes.size, os.path.join(feed_path, "shapes.txt"))) WranglerLogger.info( "Read %s stops from %s" % (feed.stops.size, os.path.join(feed_path, "stops.txt"))) WranglerLogger.info( "Read %s transfers from %s" % (feed.transfers.size, os.path.join(feed_path, "transfers.txt"))) WranglerLogger.info( "Read %s trips from %s" % (feed.trips.size, os.path.join(feed_path, "transfers.txt"))) return config, feed
def test_extract_routes(path): fd = ptg.load_feed(path) agencies = fd.agency assert len(agencies) == 3 routes = fd.routes assert len(routes) == 14 route_ids = [routes.iloc[0].route_id] agency_ids = set(fd.routes[fd.routes.route_id.isin(route_ids)].agency_id) trip_ids = set(fd.trips[fd.trips.route_id.isin(route_ids)].trip_id) stop_ids = set(fd.stop_times[fd.stop_times.trip_id.isin(trip_ids)].stop_id) assert len(agency_ids) assert len(trip_ids) assert len(stop_ids) try: tmpdir = tempfile.mkdtemp() outfile = os.path.join(tmpdir, "test.zip") result = ptg.extract_feed(path, outfile, {"trips.txt": { "route_id": route_ids }}) assert result == outfile new_fd = ptg.load_feed(outfile) assert list(new_fd.routes.route_id) == route_ids assert set(new_fd.agency.agency_id) == agency_ids assert set(new_fd.trips.trip_id) == trip_ids assert set(new_fd.stop_times.trip_id) == trip_ids assert set(new_fd.stops.stop_id) == stop_ids nodes = [] for node in fd._config.nodes(): df = fd.get(node) if not df.empty: nodes.append(node) assert len(nodes) for node in nodes: original_df = fd.get(node) new_df = new_fd.get(node) assert set(original_df.columns) == set(new_df.columns) finally: shutil.rmtree(tmpdir)
def start_new_evaluation(request): active_page = 'evaluate' if request.session.get('gtfs_feed', None): tmp_dir = request.session['gtfs_feed'] gtfs_feed = ptg.load_feed(tmp_dir) agency_options = gtfs_feed.agency['agency_name'].tolist() agency_options = list_to_tuple_of_tuples(agency_options) mode_options = list(set(gtfs_feed.routes['route_type'].tolist())) mode_options = get_mode_drop_down(mode_options) my_new_review_form = NewReviewForm(agency_options=agency_options, mode_options=mode_options) else: my_new_review_form = None if request.POST: my_new_review_form = NewReviewForm(request.POST, agency_options=agency_options, mode_options=mode_options) if my_new_review_form.is_valid(): agency_name = my_new_review_form.cleaned_data['agency'] mode = my_new_review_form.cleaned_data['mode'] new_session_gtfs_path, my_review = DataSelector.setup_initial_data_for_review(request.session['gtfs_feed'], agency_name, mode) request.session['gtfs_feed'] = new_session_gtfs_path return redirect(evaluate_feed, review_id=my_review.id) return render(request, 'start_new_evaluation.html', {'active_page': active_page, 'my_new_review_form': my_new_review_form})
def get_representative_feed(file_loc: str, day_type: str='busiest') -> ptg.gtfs.Feed: """ Given a filepath, extract a partridge feed object, holding a \ representative set of schedule patterns, extracted from the GTFS zip \ file, as a set of pandas DataFrames. Parameters ---------- file_loc : str The location (filepath) of the GTFS zip file. day_type : str The name of the type of representative feed desired. Currently, only \ one type is supported, busiest. This extracts the schedule pattern \ for a day that has the most service on it. This is determined by the \ day with the most trips on it. Returns ------- feed : ptg.gtfs.Feed A partridge feed object, holding related schedule information as \ pandas DataFrames for the busiest day in the available schedule. """ # Extract service ids and then trip counts by those dates try: service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Raised by partridge if no valid dates returned except AssertionError: # Make sure we have some valid values returned in trips raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # TODO: Due to partridge's assertion error being raised, this # check may no longer be needed. if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # At this point, different methods can be implemented to help select how # to pick which date/schedule id to use if day_type == 'busiest': # Choose the service id that has the most trips associated with it (selected_date, trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1]) else: raise NotImplementedError('Unsupported day type string supplied.') log('Selected_date: {}'.format(selected_date)) log('Number of trips on that date: {}'.format(trip_count)) all_service_ids = '\n\t'.join(service_ids_by_date[selected_date]) log('\nAll related service IDs: \n\t{}'.format(all_service_ids)) sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} return ptg.load_feed(file_loc, view=feed_query)
def read(feed_path: str) -> TransitNetwork: """ Read GTFS feed from folder and TransitNetwork object Args: feed_path: where to read transit network files from Returns: a TransitNetwork object. """ config = default_config() feed = ptg.load_feed(feed_path, config=config) WranglerLogger.info("Read in transit feed from: {}".format(feed_path)) updated_config = TransitNetwork.validate_feed(feed, config) # Read in each feed so we can write over them editable_feed = DotDict() for node in updated_config.nodes.keys(): # Load (initiate Partridge's lazy load) editable_feed[node.replace(".txt", "")] = feed.get(node) transit_network = TransitNetwork(feed=editable_feed, config=updated_config) transit_network.feed_path = feed_path return transit_network
def DoesFeedLoad(gtfs): try: feed = ptg.load_feed(gtfs) return True except Exception as e: print(e) return False
def HasBlockIDs(gtfs_filename): gtfs = ptg.load_feed(gtfs_filename) if 'block_id' not in gtfs.trips.columns: #block_id column missing return False if gtfs.trips['block_id'].isna().any( ): #block_id column there but some are missing data return False return True
def test_extract_agencies(path): fd = ptg.load_feed(path) agencies = fd.agency assert len(agencies) == 3 routes = fd.routes assert len(routes) == 14 agency_ids = [agencies.iloc[0].agency_id] route_ids = set(fd.routes[fd.routes.agency_id.isin(agency_ids)].route_id) trip_ids = set(fd.trips[fd.trips.route_id.isin(route_ids)].trip_id) stop_ids = set(fd.stop_times[fd.stop_times.trip_id.isin(trip_ids)].stop_id) assert len(route_ids) assert len(trip_ids) assert len(stop_ids) with tempfile.TemporaryDirectory() as tmpdir: outfile = os.path.join(tmpdir, "test.zip") result = ptg.extract_feed( path, outfile, {"routes.txt": {"agency_id": agency_ids}} ) assert result == outfile new_fd = ptg.load_feed(outfile) assert list(new_fd.agency.agency_id) == agency_ids assert set(new_fd.routes.route_id) == route_ids assert set(new_fd.trips.trip_id) == trip_ids assert set(new_fd.stop_times.trip_id) == trip_ids assert set(new_fd.stops.stop_id) == stop_ids nodes = [] for node in fd._config.nodes(): df = fd.get(node) if not df.empty: nodes.append(node) assert len(nodes) for node in nodes: original_df = fd.get(node) new_df = new_fd.get(node) assert set(original_df.columns) == set(new_df.columns)
def get_gtfs_feed(network, network_date): from fasttrips.Assignment import Assignment from fasttrips.Util import Util Assignment.NETWORK_BUILD_DATE = network_date service_ids_by_date = ptg.read_service_ids_by_date(network) service_ids = service_ids_by_date[network_date] feed = ptg.load_feed(network, config=Util.get_fast_trips_config(), view={ 'trips.txt': {'service_id': service_ids}, }) return feed
def get_partridge_feed_by_date(zip_path, date): service_ids_by_date = ptg.read_service_ids_by_date(zip_path) # , encoding='utf-8') service_ids = service_ids_by_date[date] feed = ptg.load_feed(zip_path, view={ 'trips.txt': { 'service_id': service_ids, }, }, # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED ) return feed
def read_input_files(self): """ Reads in the input network and demand files and initializes the relevant data structures. """ self.performance.record_step_start(0, 0, 0, "read_input_files") # Read the gtfs files first FastTripsLogger.info("Reading GTFS schedule") service_ids_by_date = ptg.read_service_ids_by_date( Assignment.INPUT_NETWORK_ARCHIVE) service_ids = service_ids_by_date[Assignment.NETWORK_BUILD_DATE] gtfs_feed = ptg.load_feed(os.path.join( Assignment.INPUT_NETWORK_ARCHIVE), config=Util.get_fast_trips_config(), view={ 'trips.txt': { 'service_id': service_ids }, }) # Read Stops (gtfs-required) self.stops = Stop(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE) # Read routes, agencies, fares self.routes = Route(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops) # Read Transfers self.transfers = Transfer(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed) # Read trips, vehicles, calendar and stoptimes self.trips = Trip(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops, self.routes, Assignment.PREPEND_ROUTE_ID_TO_TRIP_ID) # read the TAZs into a TAZ instance self.tazs = TAZ(Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops, self.transfers, self.routes) # Read the demand int passenger_id -> passenger instance self.passengers = Passenger(Assignment.INPUT_DEMAND_DIR, Assignment.OUTPUT_DIR, Assignment.NETWORK_BUILD_DATE, self.stops, self.routes, Assignment.CAPACITY_CONSTRAINT)
def read_gtfs(gtfs_feed_dir: str, parameters: dict = {}): """ Reads GTFS files from a directory and returns a StandardTransit instance. Args: gtfs_feed_dir: location of the GTFS files parameters (Optional): Dictionary of parameter settings. Of not provided will use default parameters. Returns: StandardTransit instance """ return StandardTransit(ptg.load_feed(gtfs_feed_dir), parameters=parameters)
def get_partridge_feed_by_date(zip_path, date): service_ids_by_date = ptg.read_service_ids_by_date( zip_path) # , encoding='utf-8') service_ids = service_ids_by_date[date] feed = ptg.load_feed( zip_path, view={ 'trips.txt': { 'service_id': service_ids, }, }, # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED ) return feed
def get_representative_feed(self,file_loc: str, the_date: str): year, month, day = map(int, the_date.split("/")) selected_date = date(year, month, day) # Extract service ids and then trip counts by those dates service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Make sure we have some valid values returned in trips if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} feeds=ptg.load_feed(file_loc, view=feed_query) return feeds
def read(feed_path: str, fast: bool = False) -> TransitNetwork: """ Read GTFS feed from folder and TransitNetwork object """ config = default_config() feed = ptg.load_feed(feed_path, config=config) WranglerLogger.info("Read in transit feed from: {}".format(feed_path)) updated_config = TransitNetwork.validate_feed(feed, config) # Read in each feed so we can write over them new_feed = DotDict() for node in updated_config.nodes.keys(): # Load (initiate Partridge's lazy load) new_feed[node.replace(".txt", "")] = feed.get(node) transit_network = TransitNetwork(feed=new_feed, config=updated_config) transit_network.feed_path = feed_path return transit_network
def validate_skip_result(self, request, current_result_id): '''This method validates that you may replace a result in the review. It returns True or False and a request with an error or success message.''' target_result = get_object_or_404(result, id=current_result_id) gtfs_feed = ptg.load_feed(request.session['gtfs_feed']) if not self.__gtfs_feed_matches_result(gtfs_feed, target_result): messages.error( request, 'Your active GTFS feed does not appear to match the review you are working on. You may no longer skip an item' ) return False, request if not self.__check_all_gtfs_rows_are_not_selected( gtfs_feed, target_result): messages.warning( request, "You can not skip any items in this category, all items in the feed have been selected for review." ) return False, request return True, request
def post_gtfs_zip(request): """This view is a post request for saving GTFS zip files to a temp folder for use latter""" if not request.method == 'POST' or not request.FILES: return HttpResponse('You must submit a .zip file', status=400) else: form = GtfsZipForm(request.POST, request.FILES) if form.is_valid(): try: # TODO implement better file management tmp_dir = tempfile.mkdtemp() zip_ref = zipfile.ZipFile(request.FILES['file'], 'r') zip_ref.extractall(tmp_dir) gtfs_feed = ptg.load_feed(tmp_dir) request.session['gtfs_feed'] = tmp_dir messages.success(request, "Your GTFS file has been successfully uploaded and parsed!") except: messages.error(request, 'There was an error uploading your GTFS feed. Please be sure you submitted a valid .zip GTFS file and try again.') else: messages.error(request, 'There was an error uploading your GTFS feed. Please be sure you submitted a valid .zip GTFS file and try again.') return HttpResponseRedirect(request.META.get('HTTP_REFERER'))
def HasBusRoutes(gtfs_filename): #Check to see if the feed contains any buses feed = ptg.load_feed(gtfs_filename) return feed.routes.route_type.isin(route_types).any()
v, length=0, duration=0, mode='hopoff') my_routes_g.add_edge(u_name, v_name, length=d['length'], duration=d['duration'], mode='bus') my_routes_g.remove_edge(u, v) gtfs_routes_g = copy.deepcopy(road_g) feed_query = {'routes.txt': {'route_id': route_id}} feed = ptg.load_feed(gtfs_db_dir + gtfs, view=feed_query) for i, trip in feed.trips.iterrows(): trip_id = trip['trip_id'] trip_stops = feed.stop_times.loc[feed.stop_times['trip_id'] == trip_id] trip_stops = trip_stops.sort_values('stop_sequence') trip_stops = trip_stops.merge(feed.stops, how='left', on='stop_id') trip_stops_coords = list( zip(trip_stops.stop_lon, trip_stops.stop_lat)) if len(trip_stops_coords) > 1: # Add edges between stops for stop1, stop2 in zip(trip_stops_coords[:-1], trip_stops_coords[1:]): stop1_node = ox.utils.get_nearest_node( road_g, (stop1[1], stop1[0]))
def select_new_item_for_review(self, gtfs_feed, current_result_id): '''This method will replace the specified current result with a new one from the gtfs_feed''' target_result = get_object_or_404(result, id=current_result_id) gtfs_feed = ptg.load_feed(gtfs_feed)
import partridge as ptg inpath = 'inputs/SF_gtfs.zip' # future: get the GTFS files from S3 (by default do it for all agencies) # open them in partridge _date, service_ids = ptg.read_busiest_date(inpath) view = { 'trips.txt': {'service_id': service_ids}, } feed = ptg.load_feed(inpath, view) # We define a VisualTrip as a trip, but with the unique ID being trip_headsign or # route_short_name || route_name + ' ' + trip_headsign if does not contain the route name in it (varies between agencies). # Each VisualTrip has a list of trip_ids, # but one shape, which is the result of merging the shapes from each trip, # and one list of stop_times, which are joined together. # This is done to address cases like Muni's 38 Geary, which goes to Lands End or V.A Hospital, but both go to SF Transit Center inbound # or like the TTC's 25 Don Mills, where the 25B and 25C branches have no overlap (it's a split of the route). routes = { route.route_id: route for route in feed.routes } visual_trips = {} # aggregate VisualTrips for trip in feed.trips: route_short_name = routes[trip['route_id']].route_short_name or routes[trip['route_id']].route_name trip_headsign = trip['trip_headsign'] visual_trip_key = trip_headsign if trip_headsign.contains(route_short_name) else route_short_name + ' ' + trip_headsign visual_trip = visual_trips.get(visual_trip_key, { 'trip_ids': [], 'shape': {},
def test_load_feed(): feed = ptg.load_feed(fixture("amazon-2017-08-06")) assert feed.stop_times.dtypes["stop_id"] == np.object assert feed.stop_times.dtypes["stop_sequence"] == np.int64 assert feed.stop_times.dtypes["arrival_time"] == np.float64
from pathlib import Path import os import pandas as pd import peartree as pt import gtfstk import partridge as ptg real_gtfs_dir = Path('../../data/gtfs/cleaned_undefined_zombies') gen_gtfs_dir = Path('../../output/gtfs/') imgs_info = pd.read_csv('../../data/route_imgs_256/imgs_info.csv') #----------LOOP-----------# # Loop for each generated gtfs since the generated are a subset of the cleaned gen_filepath = gen_gtfs_dir / '1.zip' # gen_filename is the filename without the extension gen_filename = gen_filepath.stem # Get the row with the info about the original gtfs the gen came from img_info = imgs_info.loc[imgs_info['img'] == gen_filename + '.jpg'] real_filepath = real_gtfs_dir / img_info['gtfs'].values[0] route_id = img_info['route_id'].values[0] feed_query = {'routes.txt': {'route_id': route_id}} gen_feed = ptg.load_feed(str(real_filepath), view=feed_query) start = 7 * 60 * 60 end = 9 * 60 * 60 G = pt.load_feed_as_graph(gen_feed, start, end) print('deb')
def test_missing_dir(): with pytest.raises(ValueError, message="File or path not found"): ptg.load_feed(fixture("missing"))
def generate_shapes(gtfs_inpath): turn_penalty_factor = 100000 # Penalizes turns in Valhalla routes. Range 0 - 100,000. stop_radius = 35 # Radius used to search when matching stop coordinates (meters) intermediate_radius = 100 # Radius used to search when matching intermediate coordinates (meters) stop_distance_threshold = 1000 # Stop-to-stop distance threshold for including intermediate coordinates (meters) maneuver_penalty = 43200 # Penalty when a route includes a change from one road to another (seconds). Range 0 - 43,200. # Initialize Valhalla input dictionary with some empty values point_parameters = { 'lon': None, 'lat': None, 'type': None, 'radius': None, 'rank_candidates': 'true', 'preferred_side': 'same', 'node_snap_tolerance': 0, 'street_side_tolerance': 0 } request_parameters = { 'shape': None, 'costing': 'bus', 'shape_match': 'map_snap', 'filters': { 'attributes': ['edge.id', 'edge.length', 'shape'], 'action': 'include' }, 'costing_options': { 'bus': { 'maneuver_penalty': maneuver_penalty } }, 'trace_options.turn_penalty_factor': turn_penalty_factor } """ -------------Objects------------- """ class Pattern: # Attributes for each unique pattern of stops that create one or more route variant def __init__(self, route, direction, stops, trips, stop_coords, shape, timepoints): self.route = route self.direction = direction self.stops = stops self.shape = shape self.trips = trips self.timepoints = timepoints self.stop_coords = stop_coords self.shape_coords = 0 self.v_input = 0 self.coord_types = 0 self.radii = 0 class Segment: # Attributes for each segment which make up a pattern def __init__(self, geometry, distance): self.geometry = geometry self.distance = distance class Corridor: # Attributes for each corridor def __init__(self, edges, segments): self.edges = edges self.segments = segments self.passenger_shared = [] self.stop_shared = [] def get_edges(self): return self.edges def get_segments(self): return self.segments def get_pass_shared(self): return self.passenger_shared def get_stop_shared(self): return self.stop_shared """ -------------Functions------------- """ # Function to get distance (in m) from a pair of lat, long coord tuples def get_distance(start, end): R = 6372800 # earth radius in m lat1, lon1 = start lat2, lon2 = end phi1, phi2 = math.radians(lat1), math.radians(lat2) dphi = math.radians(lat2 - lat1) dlambda = math.radians(lon2 - lon1) a = math.sin(dphi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin( dlambda / 2)**2 return round(2 * R * math.atan2(math.sqrt(a), math.sqrt(1 - a)), 0) """ Takes a set of route coordinates and bus stop coordinates, then finds the route coordinate pair that is closest to each bus stop. Returns an array of strings of the same length as the trip coordinate input, with 'break_through' for coordinates at bus stops and 'through' for other coordinates. """ def locate_stops_in_shapes(shape_coords, stop_coords, stop_radius, intermediate_radius): coordinate_types = [1] * len(stop_coords) radii = [stop_radius] * len(stop_coords) stop_indices = [0] * len(stop_coords) shape_coord_list = shape_coords.values.tolist() last_stop = 0 #count = 1 coordinate_list = [] shape_line = LineString([ Point(x, y) for x, y in zip(shape_coords.shape_pt_lon, shape_coords.shape_pt_lat) ]) # Get index of point closest to each bus stop for stop_number, stop in enumerate(stop_coords): stop_point = Point(stop[1], stop[0]) new_stop = nearest_points( shape_line, stop_point)[0] # index 0 is nearest point on the line coordinate_list.append((new_stop.y, new_stop.x)) benchmark = 10**9 index = 0 best_index = 0 for point in shape_coord_list[ last_stop:]: # Ensure stops occur sequentially test_dist = get_distance(point, stop) if test_dist + 2 < benchmark: # Add 2m to ensure that loop routes don't the later stop benchmark = test_dist best_index = index + last_stop index += 1 stop_indices[stop_number] = best_index last_stop = best_index + 1 #print("Stop #", count, "; Best Index:", best_index) #count += 1 added_stop_count = 0 # Add intermediate coordinates if stops are far apart for stop_number in range(len(stop_coords) - 1): current_stop = stop_coords[stop_number] next_stop = stop_coords[stop_number + 1] current_pos = stop_indices[stop_number] next_pos = stop_indices[stop_number + 1] distance = get_distance(current_stop, next_stop) if distance > stop_distance_threshold: coords_to_add = math.floor(distance / stop_distance_threshold) num_available_coords = next_pos - current_pos interval = int(num_available_coords / (coords_to_add + 1)) # If there aren't enough available coords to fill the shape, just add all coords if coords_to_add > num_available_coords: for new_coord in range(num_available_coords): coordinate_list.insert( stop_number + 1 + added_stop_count, shape_coord_list[current_pos + new_coord]) coordinate_types.insert( stop_number + 1 + added_stop_count, 0) radii.insert(stop_number + 1 + added_stop_count, intermediate_radius) added_stop_count += 1 else: for new_coord in range(coords_to_add): coordinate_list.insert( stop_number + 1 + added_stop_count, shape_coord_list[current_pos + (interval * new_coord)]) coordinate_types.insert( stop_number + 1 + added_stop_count, 0) radii.insert(stop_number + 1 + added_stop_count, intermediate_radius) added_stop_count += 1 return coordinate_types, coordinate_list, radii # Create index for each pattern def get_pattern_index(patterns): patterns = patterns.sort_values( by=['route_id', 'direction_id', 'count'], ascending=[True, True, False]) prev_dir = 0 prev_route = 0 index = [] for pattern in patterns.values.tolist(): route = pattern[2] direction = pattern[4] if route != prev_route or direction != prev_dir: pattern_count = 1 else: pattern_count += 1 index.append( str(route) + '-' + str(direction) + '-' + str(pattern_count)) prev_dir = direction prev_route = route patterns['pattern_index'] = index return patterns def get_skipped_segments(coords, request_data): # If request times out, try twice more and then raise an error to_count = 1 while to_count < 4: try: # Use Valhalla map matching engine to snap shapes to the road network request_data['shape'] = coords req = requests.post('http://localhost:8002/trace_attributes', data=json.dumps(request_data), timeout=100) to_count = 10 except: print("Timeout #", to_count) to_count += 1 if to_count == 4: raise Exception('Request timed out 3x') # Extract Valhalla response return req.json() def store_geometry_and_distance(result, leg): geometry = result['trip']['legs'][leg]['shape'] distance = result['trip']['legs'][leg]['summary']['length'] return Segment(geometry, distance) def match_segs_to_edges(pair_list, pair_dict, request_parameters): cm_count = 0 start_time = time.time() for pair in pair_list: geometry = pair[1] pair_index = pair[0] if pair_index in pair_dict: # If edges already identified, skip cm_count += 1 continue else: # If request times out, try twice more and then raise an error to_count = 1 while to_count < 4: try: # Use Valhalla map matching engine to snap shapes to the road network request_data = request_parameters.copy() request_data['shape'] = geometry req = requests.post( 'http://localhost:8002/trace_attributes', data=json.dumps(request_data), timeout=100) to_count = 10 except: print("Timeout #", to_count) to_count += 1 if to_count == 4: raise Exception('Request ', cm_count, ' timed out 3x') # Extract Valhalla response and store as pair object attribute result = req.json() edges = [] for edge in result['edges']: edges.append(edge['id']) pair_dict[pair_index] = edges cm_count += 1 if cm_count % 100 == 0: elapsed_time = time.time() - start_time print(cm_count, "of", len(pair_list), "edge ids identified.", "Elapsed time:", round(elapsed_time, 0)) start_time = time.time() return pair_dict """ -------------Main Program------------- """ # Import GTFS feed and filter down to normal bus routes only route_type = ['3'] route_desc = ['Key Bus', 'Commuter Bus', 'Local Bus'] view = {'routes.txt': {'route_type': route_type, 'route_desc': route_desc}} feed = ptg.load_feed(gtfs_inpath, view) # Check if shapes.txt exists in GTFS feed try: feed_shapes = feed.shapes[['shape_id', 'shape_pt_lat', 'shape_pt_lon']] has_shapes = True except: has_shapes = False # Check if timepoints included in GTFS feed try: feed_stop_events = feed.stop_times[[ 'trip_id', 'stop_id', 'stop_sequence', 'checkpoint_id' ]] has_timepoints = True except: feed_stop_events = feed.stop_times[[ 'trip_id', 'stop_id', 'stop_sequence' ]] has_timepoints = False # Get relevant tables from GTFS feed: trips, routes and stop sequences feed_trips = feed.trips[['route_id', 'trip_id', 'direction_id']] all_stops = pd.merge(feed_trips, feed_stop_events, on='trip_id', how='inner') all_stops = all_stops.sort_values(by=['trip_id', 'stop_sequence']) stops_dict = all_stops.groupby('trip_id')['stop_id'].agg(list).to_dict() # Get timepoints and change timepoints from binary to increasing count if has_timepoints == True: tp_dict = all_stops.groupby('trip_id')['checkpoint_id'].agg( list).to_dict() for trip in tp_dict: tp_list = tp_dict[trip] new_list = [] tp_count = 0 for stop in tp_list: if type(stop) == str: tp_count += 1 new_list.append(tp_count) tp_dict[trip] = new_list else: # Enter zeros tp_dict = {} for trip in stops_dict: tp_dict[trip] = [0] * len(stops_dict[trip]) # Get coordinates for each stop from gtfs feed_stops = feed.stops[['stop_id', 'stop_lat', 'stop_lon']].copy() stop_coordinates = list(zip(feed_stops.stop_lat, feed_stops.stop_lon)) feed_stops['coords'] = stop_coordinates.copy() feed_stops = feed_stops[['stop_id', 'coords']] stop_df = pd.merge(all_stops, feed_stops, on='stop_id', how='inner') stop_df = stop_df.sort_values(by=['trip_id', 'stop_sequence']) coords_dict = stop_df.groupby('trip_id')['coords'].agg(list).to_dict() # Find the unique sequences of stops (patterns) hash_list = list(stops_dict.values()) hashes = [] for sequence in hash_list: # hashing function for the coordinates so that they can be compared new_hash = 0 count = 1 for stop in sequence: try: num = int(stop) except: num = sum([ord(x) for x in stop]) new_hash += (2 * count)**2 + num**3 # Arbitrary hashing function count += 1 hashes.append(new_hash) all_trips = feed_trips.sort_values(by='trip_id') all_trips['hash'] = hashes # Count how many times each route-hash combination appears pattern_counts = all_trips.groupby(['route_id', 'hash', 'direction_id' ]).size().reset_index(name='count') # Get the trip_ids associated with each route-hash combination as a list of lists trip_dict = all_trips.groupby(['route_id', 'hash'])['trip_id'].agg(list).to_dict() # Create a dataframe for the patterns with route_ids, direction, count and representative trip id all_trips = all_trips.drop_duplicates( subset=['route_id', 'hash', 'direction_id']) pattern_counts = pd.merge(pattern_counts[['count', 'hash', 'route_id']], all_trips, on=['hash', 'route_id'], how='inner') pattern_counts = get_pattern_index(pattern_counts) # Create dict of Pattern objects pattern_list = pattern_counts['pattern_index'].values.tolist() pattern_dict = {} shape_dict = {} if has_shapes == True: trip_shapes = feed.trips[['trip_id', 'shape_id']] trip_shapes = trip_shapes[trip_shapes['trip_id'].isin( pattern_counts['trip_id'])] shape_dict = dict(zip(trip_shapes['trip_id'], trip_shapes['shape_id'])) for pattern in pattern_list: pattern_data = pattern_counts.loc[pattern_counts['pattern_index'] == pattern].values.tolist()[0] index = pattern route = pattern_data[2] direction = pattern_data[4] trip_id = pattern_data[3] pattern_hash = pattern_data[1] stops = stops_dict[trip_id] coords = coords_dict[trip_id] trips = trip_dict[(route, pattern_hash)] timepoints = tp_dict[trip_id] if len(shape_dict) > 0: shape = shape_dict[str(trip_id)] else: shape = 0 pattern_dict[index] = Pattern(route, direction, stops, trips, coords, shape, timepoints) # If there are no shapes in GTFS, default to the stop coordinates if has_shapes == False: for pattern in pattern_list: stop_coords = pattern_dict[pattern].stop_coords coord_json = [] for stop in stop_coords: input_data = point_parameters.copy() input_data['lon'] = stop[1] input_data['lat'] = stop[0] input_data['type'] = 'break_through' input_data['radius'] = stop_radius coord_json.append(input_data) pattern_dict[pattern].v_input = coord_json pattern_dict[pattern].coord_types = [1] * len(stop_coords) # Otherwise, include some coordinate points between each pair of stops if stops are far apart else: feed_shapes = feed.shapes[['shape_id', 'shape_pt_lat', 'shape_pt_lon']] count = 0 for pattern in pattern_list: shape = pattern_dict[pattern].shape stop_coords = pattern_dict[pattern].stop_coords shape_coords = feed_shapes.loc[feed_shapes['shape_id'] == shape][[ 'shape_pt_lat', 'shape_pt_lon' ]] coordinate_type, coordinate_list, radii = locate_stops_in_shapes( shape_coords, stop_coords, stop_radius, intermediate_radius) pattern_dict[pattern].coord_types = coordinate_type pattern_dict[pattern].radii = radii # Unrelated, but we'll need this dictionary later pattern_dict[pattern].shape_coords = coordinate_list count += 1 if count % 100 == 0: print('Coordinates prepared for', count, 'of', len(pattern_list), 'patterns') # Check that the number of 'break's is equal to number of stops in the pattern for pattern in pattern_list: coord_types = pattern_dict[pattern].coord_types radii = pattern_dict[pattern].radii num_stops = len(pattern_dict[pattern].stops) num_breaks = coord_types.count(1) if num_breaks - num_stops != 0: print("Error: Breaks - Stops =", num_breaks - num_stops, "for Pattern", pattern) coords = pattern_dict[pattern].shape_coords coord_list = [] point_count = 0 for point in coords: if coord_types[point_count]: point_type = 'break_through' else: point_type = 'through' input_data = point_parameters.copy() input_data['lon'] = point[1] input_data['lat'] = point[0] input_data['type'] = point_type input_data['radius'] = radii[point_count] coord_list.append(input_data) point_count += 1 pattern_dict[pattern].v_input = coord_list # Use map matching to convert the GTFS polylines to matched, encoded polylines mm_count = 0 segment_dict = {} skipped_segs = {} start_time = time.time() for pattern in pattern_list: coords = pattern_dict[pattern].v_input coordinate_types = pattern_dict[pattern].coord_types pattern_segs = len(pattern_dict[pattern].stops) - 1 pattern_legs = 0 start_point = 0 # Send multiple requests to Valhalla if the response is cut off while pattern_legs < pattern_segs: # If request times out, try twice more and then raise an error to_count = 1 while to_count < 6: try: # Use Valhalla map matching engine to snap shapes to the road network request_data = request_parameters.copy() request_data['shape'] = coords[start_point:] req = requests.post('http://localhost:8002/trace_route', data=json.dumps(request_data), timeout=60) to_count = 10 except: print("Timeout #", to_count) to_count += 1 if to_count == 6: # Add all segments to skipped_segments coords = pattern_dict[pattern].v_input input_points = [ i - start_point for i, x in enumerate(coordinate_types) if (x == 1 and i >= start_point) ] for point_idx, point in enumerate(input_points[:-1]): skipped_segs[( pattern, point_idx)] = coords[point:input_points[point_idx + 1]] break if to_count == 6: mm_count += 1 break # Extract encoded polyline from Valhalla response result = req.json() try: result_legs = len(result['trip']['legs']) except: # Assume timeout caused by high turn penalty - temporarily set lower for coord in coords: radius = int(coord['radius']) coord['radius'] = str(radius + 10) if radius > 500: raise Exception('No path found') continue # Check that the result 'matched points' match the input break points matched_points = [ location['original_index'] for location in result['trip']['locations'] ] input_points = [ i - start_point for i, x in enumerate(coordinate_types) if (x == 1 and i >= start_point) ] # If no points were matched, skip to the next one if len(matched_points) == 0: last_point = input_points[0] + start_point start_point += input_points[1] skipped_segs[(pattern, pattern_legs)] = coords[last_point:start_point + 1] pattern_legs += 1 continue internal_missed = [] # If they are not identical, there are 2 possible cases: # 1) Break points were skipped and 2) Response stopped short if matched_points != input_points: # Get missing points missing = np.setdiff1d(input_points, matched_points) # If the first coord is missing, skip first segment (2) if np.any(missing == 0): last_point = input_points[0] + start_point start_point += input_points[1] skipped_segs[( pattern, pattern_legs)] = coords[last_point:start_point + 1] pattern_legs += 1 continue # If some inputs were skipped over (1) if min(missing) < max(matched_points): # Get skipped inputs internal_missed = [ i for i in missing if i < max(matched_points) ] previous_match = 0 skip_count = 0 for missed_point in internal_missed: input_index = input_points.index(missed_point) previous_input = input_points[input_index - 1] next_input = input_points[input_index + 1] # Add segments on both sides of skipped stop to skipped list skipped_segs[( pattern, pattern_legs + input_index - 1)] = coords[previous_input:missed_point + 1] skipped_segs[( pattern, pattern_legs + input_index)] = coords[missed_point:next_input + 1] # Find leg before skipped point last_good_match = max([ matched_points.index(i) for i in input_points[:input_index] if i in matched_points ]) next_good_match = min([ matched_points.index(i) for i in input_points[input_index:] if i in matched_points ]) # Store geometry, distance for segments preceding skipped point for leg in range(previous_match, last_good_match): segment_dict[( pattern, pattern_legs + leg + skip_count)] = store_geometry_and_distance( result, leg) skip_count += 1 previous_match = next_good_match rem_count = 0 # Store geometry, distance for segments after last skipped point for leg in range(next_good_match, result_legs): segment_dict[( pattern, input_index + pattern_legs + 1 + rem_count)] = store_geometry_and_distance( result, leg) rem_count += 1 # Start next matching at latest matched point start_point += max( [i for i in input_points if i in matched_points]) # If all missing inputs are after last matched point (2) elif len(missing) > 0 and min(missing) > max(matched_points): # Next request should start from first missing point prev_stop = input_points[input_points.index(min(missing)) - 1] + start_point start_point += min(missing) # Determine whether cutoff happened at a stop or in between if max(matched_points) not in input_points: del_last_seg = 1 else: del_last_seg = 0 # Add segment between last matched point and missing point to skip list skipped_segs[( pattern, pattern_legs + result_legs - del_last_seg)] = coords[prev_stop:start_point + 1] # Store geometry, distance for segments preceding skipped point for leg in range(result_legs - del_last_seg): segment_dict[(pattern, pattern_legs + leg)] = store_geometry_and_distance( result, leg) # If we keep last segment, we need to add 1 to pattern legs pattern_legs += (1 - del_last_seg) # Store distance and geometry else: for leg in range(result_legs): segment_dict[(pattern, pattern_legs + leg)] = store_geometry_and_distance( result, leg) pattern_legs += result_legs + len(internal_missed) mm_count += 1 if mm_count % 100 == 0: elapsed_time = time.time() - start_time print(mm_count, "of", len(pattern_list), "patterns snapped to road network.", "Elapsed time:", round(elapsed_time, 0)) start_time = time.time() # Run a check that all segments are either in the matched segments or skipped segments for pattern in pattern_list: pattern_segs = len(pattern_dict[pattern].stops) - 1 for segment in range(pattern_segs): if (pattern, segment) not in segment_dict and ( pattern, segment) not in skipped_segs: print("Error: Pattern " + pattern + ", Seg " + str(segment) + " not assigned.") # Run a check that the number of segments in each pattern is less than (#stops - 1) for key in segment_dict: pattern = key[0] segment = key[1] if segment > len(pattern_dict[pattern].stops) - 1: print("Error: Too many segments assigned to pattern " + pattern) # Run the skipped shapes through trace_attributes to get shapes and distance pair_dict = {} pair_geom = {} for seg in skipped_segs: pattern = seg[0] sequence = seg[1] pair = tuple(pattern_dict[pattern].stops[sequence:sequence + 2]) # If this pair has already been matched as part of another pattern if pair in pair_geom: segment_dict[seg] = Segment(pair_geom[pair][0], pair_geom[pair][1]) continue coords = skipped_segs[seg].copy() result = get_skipped_segments(coords, request_parameters) no_match = False while len(result) == 4: for coord in coords: point_radius = coord['radius'] coord['radius'] = point_radius + 10 result = get_skipped_segments(coords, request_parameters) if point_radius > 150: no_match = True break if no_match: continue seg_length = 0 edge_ids = [] for edge in result['edges']: seg_length += edge['length'] edge_ids.append(edge['id']) segment_dict[seg] = Segment(result['shape'], seg_length) # Store edge ids to avoid any duplicate requests pair_geom[pair] = [result['shape'], seg_length] pair_dict[pair] = edge_ids # Construct a dataframe sorted by pattern, sequence with encoded polylines route_dict = {} used_route_pairs = set() df_route = [] df_pair = [] df_dir = [] df_pattern = [] df_dist = [] df_index = [] df_tp = [] df_encodedline = [] for pattern in pattern_list: route = pattern_dict[pattern].route direction = pattern_dict[pattern].direction stops = pattern_dict[pattern].stops timepoints = pattern_dict[pattern].timepoints for stop in range(len(stops) - 1): pair = (stops[stop], stops[stop + 1]) tp = timepoints[stop] if (pair + (route, )) not in used_route_pairs: df_route.append(route) df_pair.append(pair) df_dir.append(direction) df_pattern.append(pattern) df_encodedline.append(segment_dict[(pattern, stop)].geometry) df_dist.append(segment_dict[(pattern, stop)].distance) df_index.append( str(route) + '-' + str(pair[0]) + '-' + str(pair[1])) df_tp.append(str(route) + '-' + str(tp)) used_route_pairs.add((pair + (route, ))) if pair in route_dict: route_dict[pair].append(route) else: route_dict[pair] = list([route]) return pd.DataFrame(list( zip(df_route, df_pair, df_dir, df_pattern, df_dist, df_index, df_encodedline)), columns=[ 'route_id', 'stop_pair', 'direction', 'pattern', 'distance', 'seg_index', 'geometry' ])
def setup_initial_data_for_review(gtfs_feed_zip_file, agency, mode): '''This method will select the initial set of data that will be reviewed from the provided GTFS zip file''' my_review = review.objects.create(agency=agency, mode=mode) view = { 'agency.txt': { 'agency_name': agency }, 'routes.txt': { 'route_type': mode }, } new_tmp_dir = tempfile.mkdtemp() outpath = new_tmp_dir ptg.extract_feed(gtfs_feed_zip_file, outpath + "view.zip", view) gtfs_feed = ptg.load_feed(gtfs_feed_zip_file) new_session_gtfs_path = outpath + "view.zip" for category in review_category.objects.all(): target_field_name = category.gtfs_field.name target_table = category.gtfs_field.table has_related_field_same_table = category.review_widget.has_related_field_same_table has_related_field_other_table = category.review_widget.has_related_field_other_table ptg_target_table = getattr(gtfs_feed, target_table.replace('.txt', '')) total_table_rows = ptg_target_table.shape[0] ds = data_selector_factory(category.data_selector) number_to_sample = ds.select_row_sample_count(total_table_rows) random_sample = ptg_target_table.sample(n=number_to_sample) reviewed_data_pk_name = get_table_primary_key(target_table) for index, row in random_sample.iterrows(): try: reviewed_data = row[target_field_name] except KeyError: reviewed_data = "[blank]" try: reviewed_data_pk_value = row[reviewed_data_pk_name] except KeyError: reviewed_data_pk_value = None this_result = result.objects.create( review=my_review, review_category=category, reviewed_data=reviewed_data, reviewed_data_pk_name=reviewed_data_pk_name, reviewed_data_pk_value=reviewed_data_pk_value) if has_related_field_same_table: related_fields = category.review_widget.related_field_same_table.all( ) for field in related_fields: try: gtfs_field_value = row[field.name] except KeyError: gtfs_field_value = "[blank]" my_field = related_field.objects.create( gtfs_field=field, result=this_result, gtfs_field_value=gtfs_field_value) if has_related_field_other_table: RelatedFieldsSelector = related_fields_selector_factory( category.review_widget) field_list = RelatedFieldsSelector.get_related_fields_from_gtfs( row, gtfs_feed) for field in field_list: gf, created = gtfs_field.objects.get_or_create( name=field[0], table=field[1], type=get_field_type(field[0], field[1])) my_field = related_field.objects.create( gtfs_field=gf, result=this_result, gtfs_field_value=str(field[2])) return new_session_gtfs_path, my_review