Ejemplo n.º 1
0
def test_config_must_be_dag():
    config = ptg.config.default_config()

    assert config.has_edge("routes.txt", "trips.txt")

    # Make a cycle
    config.add_edge("trips.txt", "routes.txt")

    path = fixture("amazon-2017-08-06")
    with pytest.raises(ValueError, message="Config must be a DAG"):
        ptg.load_feed(path, config=config)
Ejemplo n.º 2
0
    def read_feed(feed_path: str = None) -> Tuple[nx.DiGraph, Feed]:
        """
        Read GTFS feed from folder and return a config and Partridge Feed object
        """
        config = geo_config()
        config.nodes["shapes.txt"]["required_columns"] = config.nodes[
            "shapes.txt"]["required_columns"] + ("A", "B", "LINK_ID")

        try:
            feed = ptg.load_feed(feed_path, config=config)
            TransitNetwork.validate_feed(feed, config)

        except KeyError:
            config = default_config()
            config.nodes["shapes.txt"]["required_columns"] = (
                "shape_id",
                "A",
                "B",
                "LINK_ID",
            )

            WranglerLogger.warning(
                "Reducing data requirements for shapes.txt to:",
                config.nodes["shapes.txt"]["required_columns"],
            )
            feed = ptg.load_feed(feed_path, config=config)
            TransitNetwork.validate_feed(feed, config)

        ## todo should be read in as a schema
        WranglerLogger.info(
            "Read %s agencies from %s" %
            (feed.agency.size, os.path.join(feed_path, "agency.txt")))
        WranglerLogger.info("Read %s frequencies from %s" %
                            (feed.frequencies.size,
                             os.path.join(feed_path, "frequencies.txt")))
        WranglerLogger.info(
            "Read %s routes from %s" %
            (feed.routes.size, os.path.join(feed_path, "routes.txt")))
        WranglerLogger.info(
            "Read %s shapes from %s" %
            (feed.shapes.size, os.path.join(feed_path, "shapes.txt")))
        WranglerLogger.info(
            "Read %s stops from %s" %
            (feed.stops.size, os.path.join(feed_path, "stops.txt")))
        WranglerLogger.info(
            "Read %s transfers from %s" %
            (feed.transfers.size, os.path.join(feed_path, "transfers.txt")))
        WranglerLogger.info(
            "Read %s trips from %s" %
            (feed.trips.size, os.path.join(feed_path, "transfers.txt")))

        return config, feed
Ejemplo n.º 3
0
def test_extract_routes(path):
    fd = ptg.load_feed(path)

    agencies = fd.agency
    assert len(agencies) == 3

    routes = fd.routes
    assert len(routes) == 14

    route_ids = [routes.iloc[0].route_id]
    agency_ids = set(fd.routes[fd.routes.route_id.isin(route_ids)].agency_id)
    trip_ids = set(fd.trips[fd.trips.route_id.isin(route_ids)].trip_id)
    stop_ids = set(fd.stop_times[fd.stop_times.trip_id.isin(trip_ids)].stop_id)

    assert len(agency_ids)
    assert len(trip_ids)
    assert len(stop_ids)

    try:
        tmpdir = tempfile.mkdtemp()
        outfile = os.path.join(tmpdir, "test.zip")

        result = ptg.extract_feed(path, outfile,
                                  {"trips.txt": {
                                      "route_id": route_ids
                                  }})
        assert result == outfile

        new_fd = ptg.load_feed(outfile)
        assert list(new_fd.routes.route_id) == route_ids
        assert set(new_fd.agency.agency_id) == agency_ids
        assert set(new_fd.trips.trip_id) == trip_ids
        assert set(new_fd.stop_times.trip_id) == trip_ids
        assert set(new_fd.stops.stop_id) == stop_ids

        nodes = []
        for node in fd._config.nodes():
            df = fd.get(node)
            if not df.empty:
                nodes.append(node)

        assert len(nodes)

        for node in nodes:
            original_df = fd.get(node)
            new_df = new_fd.get(node)
            assert set(original_df.columns) == set(new_df.columns)

    finally:
        shutil.rmtree(tmpdir)
Ejemplo n.º 4
0
def start_new_evaluation(request):
    active_page = 'evaluate'
    if request.session.get('gtfs_feed', None):
        tmp_dir = request.session['gtfs_feed']
        gtfs_feed = ptg.load_feed(tmp_dir)
        agency_options = gtfs_feed.agency['agency_name'].tolist()
        agency_options = list_to_tuple_of_tuples(agency_options)
        mode_options = list(set(gtfs_feed.routes['route_type'].tolist()))
        mode_options = get_mode_drop_down(mode_options)
        my_new_review_form = NewReviewForm(agency_options=agency_options, mode_options=mode_options)
    else:
        my_new_review_form = None
    if request.POST:
        my_new_review_form = NewReviewForm(request.POST, agency_options=agency_options, mode_options=mode_options)
        if my_new_review_form.is_valid():
            agency_name = my_new_review_form.cleaned_data['agency']
            mode = my_new_review_form.cleaned_data['mode']
            new_session_gtfs_path, my_review = DataSelector.setup_initial_data_for_review(request.session['gtfs_feed'],
                                                                                          agency_name,
                                                                                          mode)
            request.session['gtfs_feed'] = new_session_gtfs_path
            return redirect(evaluate_feed, review_id=my_review.id)

    return render(request, 'start_new_evaluation.html', {'active_page': active_page,
                                                         'my_new_review_form': my_new_review_form})
Ejemplo n.º 5
0
def get_representative_feed(file_loc: str,
                            day_type: str='busiest') -> ptg.gtfs.Feed:
    """
    Given a filepath, extract a partridge feed object, holding a \
    representative set of schedule patterns, extracted from the GTFS zip \
    file, as a set of pandas DataFrames.

    Parameters
    ----------
    file_loc : str
        The location (filepath) of the GTFS zip file.
    day_type : str
        The name of the type of representative feed desired. Currently, only \
        one type is supported, busiest. This extracts the schedule pattern \
        for a day that has the most service on it. This is determined by the \
        day with the most trips on it.

    Returns
    -------
    feed : ptg.gtfs.Feed
        A partridge feed object, holding related schedule information as \
        pandas DataFrames for the busiest day in the available schedule.
    """

    # Extract service ids and then trip counts by those dates
    try:
        service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
        trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc)

    # Raised by partridge if no valid dates returned
    except AssertionError:
        # Make sure we have some valid values returned in trips
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # TODO: Due to partridge's assertion error being raised, this
    #       check may no longer be needed.    
    if not len(trip_counts_by_date.items()):
        # Otherwise, error out
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # At this point, different methods can be implemented to help select how
    # to pick which date/schedule id to use
    if day_type == 'busiest':
        # Choose the service id that has the most trips associated with it
        (selected_date,
         trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1])
    else:
        raise NotImplementedError('Unsupported day type string supplied.')

    log('Selected_date: {}'.format(selected_date))
    log('Number of trips on that date: {}'.format(trip_count))

    all_service_ids = '\n\t'.join(service_ids_by_date[selected_date])
    log('\nAll related service IDs: \n\t{}'.format(all_service_ids))

    sub = service_ids_by_date[selected_date]
    feed_query = {'trips.txt': {'service_id': sub}}
    return ptg.load_feed(file_loc, view=feed_query)
Ejemplo n.º 6
0
    def read(feed_path: str) -> TransitNetwork:
        """
        Read GTFS feed from folder and TransitNetwork object

        Args:
            feed_path: where to read transit network files from

        Returns: a TransitNetwork object.
        """
        config = default_config()
        feed = ptg.load_feed(feed_path, config=config)
        WranglerLogger.info("Read in transit feed from: {}".format(feed_path))

        updated_config = TransitNetwork.validate_feed(feed, config)

        # Read in each feed so we can write over them
        editable_feed = DotDict()
        for node in updated_config.nodes.keys():
            # Load (initiate Partridge's lazy load)
            editable_feed[node.replace(".txt", "")] = feed.get(node)

        transit_network = TransitNetwork(feed=editable_feed,
                                         config=updated_config)
        transit_network.feed_path = feed_path
        return transit_network
Ejemplo n.º 7
0
def DoesFeedLoad(gtfs):
    try:
        feed = ptg.load_feed(gtfs)
        return True
    except Exception as e:
        print(e)
        return False
Ejemplo n.º 8
0
def HasBlockIDs(gtfs_filename):
    gtfs = ptg.load_feed(gtfs_filename)
    if 'block_id' not in gtfs.trips.columns:  #block_id column missing
        return False
    if gtfs.trips['block_id'].isna().any(
    ):  #block_id column there but some are missing data
        return False
    return True
Ejemplo n.º 9
0
def test_extract_agencies(path):
    fd = ptg.load_feed(path)

    agencies = fd.agency
    assert len(agencies) == 3

    routes = fd.routes
    assert len(routes) == 14

    agency_ids = [agencies.iloc[0].agency_id]
    route_ids = set(fd.routes[fd.routes.agency_id.isin(agency_ids)].route_id)
    trip_ids = set(fd.trips[fd.trips.route_id.isin(route_ids)].trip_id)
    stop_ids = set(fd.stop_times[fd.stop_times.trip_id.isin(trip_ids)].stop_id)

    assert len(route_ids)
    assert len(trip_ids)
    assert len(stop_ids)

    with tempfile.TemporaryDirectory() as tmpdir:
        outfile = os.path.join(tmpdir, "test.zip")

        result = ptg.extract_feed(
            path, outfile, {"routes.txt": {"agency_id": agency_ids}}
        )
        assert result == outfile

        new_fd = ptg.load_feed(outfile)
        assert list(new_fd.agency.agency_id) == agency_ids
        assert set(new_fd.routes.route_id) == route_ids
        assert set(new_fd.trips.trip_id) == trip_ids
        assert set(new_fd.stop_times.trip_id) == trip_ids
        assert set(new_fd.stops.stop_id) == stop_ids

        nodes = []
        for node in fd._config.nodes():
            df = fd.get(node)
            if not df.empty:
                nodes.append(node)

        assert len(nodes)

        for node in nodes:
            original_df = fd.get(node)
            new_df = new_fd.get(node)
            assert set(original_df.columns) == set(new_df.columns)
Ejemplo n.º 10
0
def get_gtfs_feed(network, network_date):
    from fasttrips.Assignment import Assignment
    from fasttrips.Util import Util

    Assignment.NETWORK_BUILD_DATE = network_date

    service_ids_by_date = ptg.read_service_ids_by_date(network)
    service_ids = service_ids_by_date[network_date]
    feed = ptg.load_feed(network, config=Util.get_fast_trips_config(), view={
        'trips.txt': {'service_id': service_ids},
    })
    return feed
Ejemplo n.º 11
0
def get_partridge_feed_by_date(zip_path, date):
    service_ids_by_date = ptg.read_service_ids_by_date(zip_path)  # , encoding='utf-8')
    service_ids = service_ids_by_date[date]

    feed = ptg.load_feed(zip_path, view={
        'trips.txt': {
            'service_id': service_ids,
        },
    },
                    # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED
                    )
    return feed
Ejemplo n.º 12
0
    def read_input_files(self):
        """
        Reads in the input network and demand files and initializes the relevant data structures.
        """
        self.performance.record_step_start(0, 0, 0, "read_input_files")

        # Read the gtfs files first
        FastTripsLogger.info("Reading GTFS schedule")

        service_ids_by_date = ptg.read_service_ids_by_date(
            Assignment.INPUT_NETWORK_ARCHIVE)
        service_ids = service_ids_by_date[Assignment.NETWORK_BUILD_DATE]
        gtfs_feed = ptg.load_feed(os.path.join(
            Assignment.INPUT_NETWORK_ARCHIVE),
                                  config=Util.get_fast_trips_config(),
                                  view={
                                      'trips.txt': {
                                          'service_id': service_ids
                                      },
                                  })
        # Read Stops (gtfs-required)
        self.stops = Stop(Assignment.INPUT_NETWORK_ARCHIVE,
                          Assignment.OUTPUT_DIR, gtfs_feed,
                          Assignment.NETWORK_BUILD_DATE)

        # Read routes, agencies, fares
        self.routes = Route(Assignment.INPUT_NETWORK_ARCHIVE,
                            Assignment.OUTPUT_DIR, gtfs_feed,
                            Assignment.NETWORK_BUILD_DATE, self.stops)

        # Read Transfers
        self.transfers = Transfer(Assignment.INPUT_NETWORK_ARCHIVE,
                                  Assignment.OUTPUT_DIR, gtfs_feed)

        # Read trips, vehicles, calendar and stoptimes
        self.trips = Trip(Assignment.INPUT_NETWORK_ARCHIVE,
                          Assignment.OUTPUT_DIR, gtfs_feed,
                          Assignment.NETWORK_BUILD_DATE, self.stops,
                          self.routes, Assignment.PREPEND_ROUTE_ID_TO_TRIP_ID)

        # read the TAZs into a TAZ instance
        self.tazs = TAZ(Assignment.OUTPUT_DIR, gtfs_feed,
                        Assignment.NETWORK_BUILD_DATE, self.stops,
                        self.transfers, self.routes)

        # Read the demand int passenger_id -> passenger instance
        self.passengers = Passenger(Assignment.INPUT_DEMAND_DIR,
                                    Assignment.OUTPUT_DIR,
                                    Assignment.NETWORK_BUILD_DATE, self.stops,
                                    self.routes,
                                    Assignment.CAPACITY_CONSTRAINT)
Ejemplo n.º 13
0
    def read_gtfs(gtfs_feed_dir: str, parameters: dict = {}):
        """
        Reads GTFS files from a directory and returns a StandardTransit
        instance.

        Args:
            gtfs_feed_dir: location of the GTFS files
            parameters (Optional): Dictionary of parameter settings. Of not provided will
                use default parameters.

        Returns:
            StandardTransit instance
        """
        return StandardTransit(ptg.load_feed(gtfs_feed_dir), parameters=parameters)
Ejemplo n.º 14
0
def get_partridge_feed_by_date(zip_path, date):
    service_ids_by_date = ptg.read_service_ids_by_date(
        zip_path)  # , encoding='utf-8')
    service_ids = service_ids_by_date[date]

    feed = ptg.load_feed(
        zip_path,
        view={
            'trips.txt': {
                'service_id': service_ids,
            },
        },
        # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED
    )
    return feed
Ejemplo n.º 15
0
 def get_representative_feed(self,file_loc: str, the_date: str):
     year, month, day = map(int, the_date.split("/"))
     selected_date = date(year, month, day)
     # Extract service ids and then trip counts by those dates
     service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
     trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) 
     # Make sure we have some valid values returned in trips
     if not len(trip_counts_by_date.items()):
         # Otherwise, error out
         raise InvalidGTFS('No valid trip counts by date '
                           'were identified in GTFS.')
     sub = service_ids_by_date[selected_date]
     feed_query = {'trips.txt': {'service_id': sub}}
     feeds=ptg.load_feed(file_loc, view=feed_query)
     return feeds
Ejemplo n.º 16
0
    def read(feed_path: str, fast: bool = False) -> TransitNetwork:
        """
        Read GTFS feed from folder and TransitNetwork object
        """
        config = default_config()
        feed = ptg.load_feed(feed_path, config=config)
        WranglerLogger.info("Read in transit feed from: {}".format(feed_path))
        updated_config = TransitNetwork.validate_feed(feed, config)

        # Read in each feed so we can write over them
        new_feed = DotDict()
        for node in updated_config.nodes.keys():
            # Load (initiate Partridge's lazy load)
            new_feed[node.replace(".txt", "")] = feed.get(node)

        transit_network = TransitNetwork(feed=new_feed, config=updated_config)
        transit_network.feed_path = feed_path
        return transit_network
Ejemplo n.º 17
0
 def validate_skip_result(self, request, current_result_id):
     '''This method validates that you may replace a result in the review.  It returns True or False and a request
     with an error or success message.'''
     target_result = get_object_or_404(result, id=current_result_id)
     gtfs_feed = ptg.load_feed(request.session['gtfs_feed'])
     if not self.__gtfs_feed_matches_result(gtfs_feed, target_result):
         messages.error(
             request,
             'Your active GTFS feed does not appear to match the review you are working on. You may no longer skip an item'
         )
         return False, request
     if not self.__check_all_gtfs_rows_are_not_selected(
             gtfs_feed, target_result):
         messages.warning(
             request,
             "You can not skip any items in this category, all items in the feed have been selected for review."
         )
         return False, request
     return True, request
Ejemplo n.º 18
0
def post_gtfs_zip(request):
    """This view is a post request for saving GTFS zip files to a temp folder for use latter"""
    if not request.method == 'POST' or not request.FILES:
        return HttpResponse('You must submit a .zip file', status=400)
    else:
        form = GtfsZipForm(request.POST, request.FILES)
        if form.is_valid():

            try:
                # TODO implement better file management
                tmp_dir = tempfile.mkdtemp()
                zip_ref = zipfile.ZipFile(request.FILES['file'], 'r')
                zip_ref.extractall(tmp_dir)
                gtfs_feed = ptg.load_feed(tmp_dir)
                request.session['gtfs_feed'] = tmp_dir
                messages.success(request, "Your GTFS file has been successfully uploaded and parsed!")
            except:
                messages.error(request,
                               'There was an error uploading your GTFS feed.  Please be sure you submitted a valid .zip GTFS file and try again.')
        else:
            messages.error(request,
                           'There was an error uploading your GTFS feed.  Please be sure you submitted a valid .zip GTFS file and try again.')

        return HttpResponseRedirect(request.META.get('HTTP_REFERER'))
Ejemplo n.º 19
0
def HasBusRoutes(gtfs_filename):
    #Check to see if the feed contains any buses
    feed = ptg.load_feed(gtfs_filename)
    return feed.routes.route_type.isin(route_types).any()
Ejemplo n.º 20
0
                                         v,
                                         length=0,
                                         duration=0,
                                         mode='hopoff')

                my_routes_g.add_edge(u_name,
                                     v_name,
                                     length=d['length'],
                                     duration=d['duration'],
                                     mode='bus')
                my_routes_g.remove_edge(u, v)

        gtfs_routes_g = copy.deepcopy(road_g)

        feed_query = {'routes.txt': {'route_id': route_id}}
        feed = ptg.load_feed(gtfs_db_dir + gtfs, view=feed_query)

        for i, trip in feed.trips.iterrows():
            trip_id = trip['trip_id']
            trip_stops = feed.stop_times.loc[feed.stop_times['trip_id'] ==
                                             trip_id]
            trip_stops = trip_stops.sort_values('stop_sequence')
            trip_stops = trip_stops.merge(feed.stops, how='left', on='stop_id')
            trip_stops_coords = list(
                zip(trip_stops.stop_lon, trip_stops.stop_lat))
            if len(trip_stops_coords) > 1:
                # Add edges between stops
                for stop1, stop2 in zip(trip_stops_coords[:-1],
                                        trip_stops_coords[1:]):
                    stop1_node = ox.utils.get_nearest_node(
                        road_g, (stop1[1], stop1[0]))
Ejemplo n.º 21
0
    def select_new_item_for_review(self, gtfs_feed, current_result_id):
        '''This method will replace the specified current result with a new one from the gtfs_feed'''

        target_result = get_object_or_404(result, id=current_result_id)
        gtfs_feed = ptg.load_feed(gtfs_feed)
Ejemplo n.º 22
0
import partridge as ptg


inpath = 'inputs/SF_gtfs.zip'
# future: get the GTFS files from S3 (by default do it for all agencies)

# open them in partridge
_date, service_ids = ptg.read_busiest_date(inpath)
view = {
    'trips.txt': {'service_id': service_ids},
}
feed = ptg.load_feed(inpath, view)
# We define a VisualTrip as a trip, but with the unique ID being trip_headsign or
# route_short_name || route_name + ' ' + trip_headsign if does not contain the route name in it (varies between agencies).
# Each VisualTrip has a list of trip_ids,
# but one shape, which is the result of merging the shapes from each trip,
# and one list of stop_times, which are joined together.
# This is done to address cases like Muni's 38 Geary, which goes to Lands End or V.A Hospital, but both go to SF Transit Center inbound
# or like the TTC's 25 Don Mills, where the 25B and 25C branches have no overlap (it's a split of the route).
routes = {
    route.route_id: route for route in feed.routes
}
visual_trips = {}
# aggregate VisualTrips
for trip in feed.trips:
    route_short_name = routes[trip['route_id']].route_short_name or routes[trip['route_id']].route_name
    trip_headsign = trip['trip_headsign']
    visual_trip_key = trip_headsign if trip_headsign.contains(route_short_name) else route_short_name + ' ' + trip_headsign
    visual_trip = visual_trips.get(visual_trip_key, {
        'trip_ids': [],
        'shape': {},
Ejemplo n.º 23
0
def test_load_feed():
    feed = ptg.load_feed(fixture("amazon-2017-08-06"))
    assert feed.stop_times.dtypes["stop_id"] == np.object
    assert feed.stop_times.dtypes["stop_sequence"] == np.int64
    assert feed.stop_times.dtypes["arrival_time"] == np.float64
Ejemplo n.º 24
0
from pathlib import Path
import os
import pandas as pd
import peartree as pt
import gtfstk
import partridge as ptg

real_gtfs_dir = Path('../../data/gtfs/cleaned_undefined_zombies')
gen_gtfs_dir = Path('../../output/gtfs/')

imgs_info = pd.read_csv('../../data/route_imgs_256/imgs_info.csv')

#----------LOOP-----------#
# Loop for each generated gtfs since the generated are a subset of the cleaned
gen_filepath = gen_gtfs_dir / '1.zip'
# gen_filename is the filename without the extension
gen_filename = gen_filepath.stem
# Get the row with the info about the original gtfs the gen came from
img_info = imgs_info.loc[imgs_info['img'] == gen_filename + '.jpg']
real_filepath = real_gtfs_dir / img_info['gtfs'].values[0]
route_id = img_info['route_id'].values[0]

feed_query = {'routes.txt': {'route_id': route_id}}
gen_feed = ptg.load_feed(str(real_filepath), view=feed_query)

start = 7 * 60 * 60
end = 9 * 60 * 60
G = pt.load_feed_as_graph(gen_feed, start, end)

print('deb')
Ejemplo n.º 25
0
def test_missing_dir():
    with pytest.raises(ValueError, message="File or path not found"):
        ptg.load_feed(fixture("missing"))
Ejemplo n.º 26
0
def generate_shapes(gtfs_inpath):

    turn_penalty_factor = 100000  # Penalizes turns in Valhalla routes. Range 0 - 100,000.
    stop_radius = 35  # Radius used to search when matching stop coordinates (meters)
    intermediate_radius = 100  # Radius used to search when matching intermediate coordinates (meters)

    stop_distance_threshold = 1000  # Stop-to-stop distance threshold for including intermediate coordinates (meters)
    maneuver_penalty = 43200  # Penalty when a route includes a change from one road to another (seconds). Range 0 - 43,200.

    # Initialize Valhalla input dictionary with some empty values
    point_parameters = {
        'lon': None,
        'lat': None,
        'type': None,
        'radius': None,
        'rank_candidates': 'true',
        'preferred_side': 'same',
        'node_snap_tolerance': 0,
        'street_side_tolerance': 0
    }

    request_parameters = {
        'shape': None,
        'costing': 'bus',
        'shape_match': 'map_snap',
        'filters': {
            'attributes': ['edge.id', 'edge.length', 'shape'],
            'action': 'include'
        },
        'costing_options': {
            'bus': {
                'maneuver_penalty': maneuver_penalty
            }
        },
        'trace_options.turn_penalty_factor': turn_penalty_factor
    }
    """ -------------Objects------------- """
    class Pattern:  # Attributes for each unique pattern of stops that create one or more route variant
        def __init__(self, route, direction, stops, trips, stop_coords, shape,
                     timepoints):
            self.route = route
            self.direction = direction
            self.stops = stops
            self.shape = shape
            self.trips = trips
            self.timepoints = timepoints
            self.stop_coords = stop_coords
            self.shape_coords = 0
            self.v_input = 0
            self.coord_types = 0
            self.radii = 0

    class Segment:  # Attributes for each segment which make up a pattern
        def __init__(self, geometry, distance):
            self.geometry = geometry
            self.distance = distance

    class Corridor:  # Attributes for each corridor
        def __init__(self, edges, segments):
            self.edges = edges
            self.segments = segments
            self.passenger_shared = []
            self.stop_shared = []

        def get_edges(self):
            return self.edges

        def get_segments(self):
            return self.segments

        def get_pass_shared(self):
            return self.passenger_shared

        def get_stop_shared(self):
            return self.stop_shared

    """ -------------Functions------------- """

    # Function to get distance (in m) from a pair of lat, long coord tuples
    def get_distance(start, end):
        R = 6372800  # earth radius in m
        lat1, lon1 = start
        lat2, lon2 = end

        phi1, phi2 = math.radians(lat1), math.radians(lat2)
        dphi = math.radians(lat2 - lat1)
        dlambda = math.radians(lon2 - lon1)
        a = math.sin(dphi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(
            dlambda / 2)**2
        return round(2 * R * math.atan2(math.sqrt(a), math.sqrt(1 - a)), 0)

    """ 
    Takes a set of route coordinates and bus stop coordinates, then finds the 
    route coordinate pair that is closest to each bus stop. Returns an array of 
    strings of the same length as the trip coordinate input, with 'break_through' 
    for coordinates at bus stops and 'through' for other coordinates.
    """

    def locate_stops_in_shapes(shape_coords, stop_coords, stop_radius,
                               intermediate_radius):
        coordinate_types = [1] * len(stop_coords)
        radii = [stop_radius] * len(stop_coords)
        stop_indices = [0] * len(stop_coords)
        shape_coord_list = shape_coords.values.tolist()

        last_stop = 0
        #count = 1
        coordinate_list = []
        shape_line = LineString([
            Point(x, y) for x, y in zip(shape_coords.shape_pt_lon,
                                        shape_coords.shape_pt_lat)
        ])

        # Get index of point closest to each bus stop
        for stop_number, stop in enumerate(stop_coords):

            stop_point = Point(stop[1], stop[0])
            new_stop = nearest_points(
                shape_line,
                stop_point)[0]  # index 0 is nearest point on the line
            coordinate_list.append((new_stop.y, new_stop.x))

            benchmark = 10**9
            index = 0
            best_index = 0
            for point in shape_coord_list[
                    last_stop:]:  # Ensure stops occur sequentially
                test_dist = get_distance(point, stop)
                if test_dist + 2 < benchmark:  # Add 2m to ensure that loop routes don't the later stop
                    benchmark = test_dist
                    best_index = index + last_stop
                index += 1
            stop_indices[stop_number] = best_index
            last_stop = best_index + 1
            #print("Stop #", count, "; Best Index:", best_index)
            #count += 1

        added_stop_count = 0

        # Add intermediate coordinates if stops are far apart
        for stop_number in range(len(stop_coords) - 1):
            current_stop = stop_coords[stop_number]
            next_stop = stop_coords[stop_number + 1]
            current_pos = stop_indices[stop_number]
            next_pos = stop_indices[stop_number + 1]

            distance = get_distance(current_stop, next_stop)

            if distance > stop_distance_threshold:

                coords_to_add = math.floor(distance / stop_distance_threshold)
                num_available_coords = next_pos - current_pos
                interval = int(num_available_coords / (coords_to_add + 1))

                # If there aren't enough available coords to fill the shape, just add all coords
                if coords_to_add > num_available_coords:

                    for new_coord in range(num_available_coords):

                        coordinate_list.insert(
                            stop_number + 1 + added_stop_count,
                            shape_coord_list[current_pos + new_coord])
                        coordinate_types.insert(
                            stop_number + 1 + added_stop_count, 0)
                        radii.insert(stop_number + 1 + added_stop_count,
                                     intermediate_radius)
                        added_stop_count += 1

                else:
                    for new_coord in range(coords_to_add):

                        coordinate_list.insert(
                            stop_number + 1 + added_stop_count,
                            shape_coord_list[current_pos +
                                             (interval * new_coord)])
                        coordinate_types.insert(
                            stop_number + 1 + added_stop_count, 0)
                        radii.insert(stop_number + 1 + added_stop_count,
                                     intermediate_radius)
                        added_stop_count += 1

        return coordinate_types, coordinate_list, radii

    # Create index for each pattern
    def get_pattern_index(patterns):
        patterns = patterns.sort_values(
            by=['route_id', 'direction_id', 'count'],
            ascending=[True, True, False])
        prev_dir = 0
        prev_route = 0
        index = []
        for pattern in patterns.values.tolist():
            route = pattern[2]
            direction = pattern[4]
            if route != prev_route or direction != prev_dir:
                pattern_count = 1
            else:
                pattern_count += 1
            index.append(
                str(route) + '-' + str(direction) + '-' + str(pattern_count))
            prev_dir = direction
            prev_route = route
        patterns['pattern_index'] = index
        return patterns

    def get_skipped_segments(coords, request_data):
        # If request times out, try twice more and then raise an error
        to_count = 1
        while to_count < 4:
            try:
                # Use Valhalla map matching engine to snap shapes to the road network
                request_data['shape'] = coords
                req = requests.post('http://localhost:8002/trace_attributes',
                                    data=json.dumps(request_data),
                                    timeout=100)
                to_count = 10
            except:
                print("Timeout #", to_count)
                to_count += 1
            if to_count == 4:
                raise Exception('Request timed out 3x')

        # Extract Valhalla response
        return req.json()

    def store_geometry_and_distance(result, leg):
        geometry = result['trip']['legs'][leg]['shape']
        distance = result['trip']['legs'][leg]['summary']['length']
        return Segment(geometry, distance)

    def match_segs_to_edges(pair_list, pair_dict, request_parameters):
        cm_count = 0
        start_time = time.time()

        for pair in pair_list:
            geometry = pair[1]
            pair_index = pair[0]

            if pair_index in pair_dict:  # If edges already identified, skip
                cm_count += 1
                continue
            else:

                # If request times out, try twice more and then raise an error
                to_count = 1
                while to_count < 4:
                    try:
                        # Use Valhalla map matching engine to snap shapes to the road network
                        request_data = request_parameters.copy()
                        request_data['shape'] = geometry
                        req = requests.post(
                            'http://localhost:8002/trace_attributes',
                            data=json.dumps(request_data),
                            timeout=100)
                        to_count = 10
                    except:
                        print("Timeout #", to_count)
                        to_count += 1
                    if to_count == 4:
                        raise Exception('Request ', cm_count, ' timed out 3x')

                # Extract Valhalla response and store as pair object attribute
                result = req.json()
                edges = []
                for edge in result['edges']:
                    edges.append(edge['id'])
                pair_dict[pair_index] = edges

            cm_count += 1
            if cm_count % 100 == 0:
                elapsed_time = time.time() - start_time
                print(cm_count, "of", len(pair_list), "edge ids identified.",
                      "Elapsed time:", round(elapsed_time, 0))
                start_time = time.time()

        return pair_dict

    """ -------------Main Program------------- """

    # Import GTFS feed and filter down to normal bus routes only
    route_type = ['3']
    route_desc = ['Key Bus', 'Commuter Bus', 'Local Bus']
    view = {'routes.txt': {'route_type': route_type, 'route_desc': route_desc}}
    feed = ptg.load_feed(gtfs_inpath, view)

    # Check if shapes.txt exists in GTFS feed
    try:
        feed_shapes = feed.shapes[['shape_id', 'shape_pt_lat', 'shape_pt_lon']]
        has_shapes = True
    except:
        has_shapes = False

    # Check if timepoints included in GTFS feed

    try:
        feed_stop_events = feed.stop_times[[
            'trip_id', 'stop_id', 'stop_sequence', 'checkpoint_id'
        ]]
        has_timepoints = True
    except:
        feed_stop_events = feed.stop_times[[
            'trip_id', 'stop_id', 'stop_sequence'
        ]]
        has_timepoints = False

    # Get relevant tables from GTFS feed: trips, routes and stop sequences
    feed_trips = feed.trips[['route_id', 'trip_id', 'direction_id']]
    all_stops = pd.merge(feed_trips,
                         feed_stop_events,
                         on='trip_id',
                         how='inner')
    all_stops = all_stops.sort_values(by=['trip_id', 'stop_sequence'])
    stops_dict = all_stops.groupby('trip_id')['stop_id'].agg(list).to_dict()

    # Get timepoints and change timepoints from binary to increasing count
    if has_timepoints == True:
        tp_dict = all_stops.groupby('trip_id')['checkpoint_id'].agg(
            list).to_dict()
        for trip in tp_dict:
            tp_list = tp_dict[trip]
            new_list = []
            tp_count = 0
            for stop in tp_list:
                if type(stop) == str:
                    tp_count += 1
                new_list.append(tp_count)
            tp_dict[trip] = new_list

    else:
        # Enter zeros
        tp_dict = {}
        for trip in stops_dict:
            tp_dict[trip] = [0] * len(stops_dict[trip])

    # Get coordinates for each stop from gtfs
    feed_stops = feed.stops[['stop_id', 'stop_lat', 'stop_lon']].copy()
    stop_coordinates = list(zip(feed_stops.stop_lat, feed_stops.stop_lon))
    feed_stops['coords'] = stop_coordinates.copy()
    feed_stops = feed_stops[['stop_id', 'coords']]
    stop_df = pd.merge(all_stops, feed_stops, on='stop_id', how='inner')
    stop_df = stop_df.sort_values(by=['trip_id', 'stop_sequence'])
    coords_dict = stop_df.groupby('trip_id')['coords'].agg(list).to_dict()

    # Find the unique sequences of stops (patterns)
    hash_list = list(stops_dict.values())
    hashes = []
    for sequence in hash_list:  # hashing function for the coordinates so that they can be compared
        new_hash = 0
        count = 1
        for stop in sequence:
            try:
                num = int(stop)
            except:
                num = sum([ord(x) for x in stop])
            new_hash += (2 * count)**2 + num**3  # Arbitrary hashing function
            count += 1
        hashes.append(new_hash)
    all_trips = feed_trips.sort_values(by='trip_id')
    all_trips['hash'] = hashes

    # Count how many times each route-hash combination appears
    pattern_counts = all_trips.groupby(['route_id', 'hash', 'direction_id'
                                        ]).size().reset_index(name='count')

    # Get the trip_ids associated with each route-hash combination as a list of lists
    trip_dict = all_trips.groupby(['route_id',
                                   'hash'])['trip_id'].agg(list).to_dict()

    # Create a dataframe for the patterns with route_ids, direction, count and representative trip id
    all_trips = all_trips.drop_duplicates(
        subset=['route_id', 'hash', 'direction_id'])
    pattern_counts = pd.merge(pattern_counts[['count', 'hash', 'route_id']],
                              all_trips,
                              on=['hash', 'route_id'],
                              how='inner')
    pattern_counts = get_pattern_index(pattern_counts)

    # Create dict of Pattern objects
    pattern_list = pattern_counts['pattern_index'].values.tolist()
    pattern_dict = {}
    shape_dict = {}

    if has_shapes == True:
        trip_shapes = feed.trips[['trip_id', 'shape_id']]
        trip_shapes = trip_shapes[trip_shapes['trip_id'].isin(
            pattern_counts['trip_id'])]
        shape_dict = dict(zip(trip_shapes['trip_id'], trip_shapes['shape_id']))

    for pattern in pattern_list:
        pattern_data = pattern_counts.loc[pattern_counts['pattern_index'] ==
                                          pattern].values.tolist()[0]
        index = pattern
        route = pattern_data[2]
        direction = pattern_data[4]
        trip_id = pattern_data[3]
        pattern_hash = pattern_data[1]
        stops = stops_dict[trip_id]
        coords = coords_dict[trip_id]
        trips = trip_dict[(route, pattern_hash)]
        timepoints = tp_dict[trip_id]

        if len(shape_dict) > 0:
            shape = shape_dict[str(trip_id)]
        else:
            shape = 0

        pattern_dict[index] = Pattern(route, direction, stops, trips, coords,
                                      shape, timepoints)

    # If there are no shapes in GTFS, default to the stop coordinates
    if has_shapes == False:
        for pattern in pattern_list:
            stop_coords = pattern_dict[pattern].stop_coords
            coord_json = []
            for stop in stop_coords:
                input_data = point_parameters.copy()
                input_data['lon'] = stop[1]
                input_data['lat'] = stop[0]
                input_data['type'] = 'break_through'
                input_data['radius'] = stop_radius
                coord_json.append(input_data)

            pattern_dict[pattern].v_input = coord_json
            pattern_dict[pattern].coord_types = [1] * len(stop_coords)

    # Otherwise, include some coordinate points between each pair of stops if stops are far apart
    else:
        feed_shapes = feed.shapes[['shape_id', 'shape_pt_lat', 'shape_pt_lon']]
        count = 0
        for pattern in pattern_list:
            shape = pattern_dict[pattern].shape
            stop_coords = pattern_dict[pattern].stop_coords
            shape_coords = feed_shapes.loc[feed_shapes['shape_id'] == shape][[
                'shape_pt_lat', 'shape_pt_lon'
            ]]

            coordinate_type, coordinate_list, radii = locate_stops_in_shapes(
                shape_coords, stop_coords, stop_radius, intermediate_radius)
            pattern_dict[pattern].coord_types = coordinate_type
            pattern_dict[pattern].radii = radii

            # Unrelated, but we'll need this dictionary later
            pattern_dict[pattern].shape_coords = coordinate_list
            count += 1
            if count % 100 == 0:
                print('Coordinates prepared for', count, 'of',
                      len(pattern_list), 'patterns')

        # Check that the number of 'break's is equal to number of stops in the pattern
        for pattern in pattern_list:

            coord_types = pattern_dict[pattern].coord_types
            radii = pattern_dict[pattern].radii
            num_stops = len(pattern_dict[pattern].stops)
            num_breaks = coord_types.count(1)
            if num_breaks - num_stops != 0:
                print("Error: Breaks - Stops =", num_breaks - num_stops,
                      "for Pattern", pattern)

            coords = pattern_dict[pattern].shape_coords
            coord_list = []
            point_count = 0
            for point in coords:

                if coord_types[point_count]:
                    point_type = 'break_through'
                else:
                    point_type = 'through'

                input_data = point_parameters.copy()
                input_data['lon'] = point[1]
                input_data['lat'] = point[0]
                input_data['type'] = point_type
                input_data['radius'] = radii[point_count]
                coord_list.append(input_data)
                point_count += 1

            pattern_dict[pattern].v_input = coord_list

    # Use map matching to convert the GTFS polylines to matched, encoded polylines
    mm_count = 0
    segment_dict = {}
    skipped_segs = {}
    start_time = time.time()

    for pattern in pattern_list:
        coords = pattern_dict[pattern].v_input
        coordinate_types = pattern_dict[pattern].coord_types
        pattern_segs = len(pattern_dict[pattern].stops) - 1
        pattern_legs = 0
        start_point = 0

        # Send multiple requests to Valhalla if the response is cut off
        while pattern_legs < pattern_segs:
            # If request times out, try twice more and then raise an error
            to_count = 1
            while to_count < 6:
                try:
                    # Use Valhalla map matching engine to snap shapes to the road network
                    request_data = request_parameters.copy()
                    request_data['shape'] = coords[start_point:]
                    req = requests.post('http://localhost:8002/trace_route',
                                        data=json.dumps(request_data),
                                        timeout=60)
                    to_count = 10
                except:
                    print("Timeout #", to_count)
                    to_count += 1

                if to_count == 6:
                    # Add all segments to skipped_segments
                    coords = pattern_dict[pattern].v_input
                    input_points = [
                        i - start_point for i, x in enumerate(coordinate_types)
                        if (x == 1 and i >= start_point)
                    ]
                    for point_idx, point in enumerate(input_points[:-1]):
                        skipped_segs[(
                            pattern,
                            point_idx)] = coords[point:input_points[point_idx +
                                                                    1]]
                    break

            if to_count == 6:
                mm_count += 1
                break

            # Extract encoded polyline from Valhalla response
            result = req.json()
            try:
                result_legs = len(result['trip']['legs'])
            except:
                # Assume timeout caused by high turn penalty - temporarily set lower
                for coord in coords:
                    radius = int(coord['radius'])
                    coord['radius'] = str(radius + 10)
                    if radius > 500:
                        raise Exception('No path found')
                continue

            # Check that the result 'matched points' match the input break points
            matched_points = [
                location['original_index']
                for location in result['trip']['locations']
            ]
            input_points = [
                i - start_point for i, x in enumerate(coordinate_types)
                if (x == 1 and i >= start_point)
            ]

            # If no points were matched, skip to the next one
            if len(matched_points) == 0:
                last_point = input_points[0] + start_point
                start_point += input_points[1]
                skipped_segs[(pattern,
                              pattern_legs)] = coords[last_point:start_point +
                                                      1]
                pattern_legs += 1
                continue

            internal_missed = []

            # If they are not identical, there are 2 possible cases:
            # 1) Break points were skipped and 2) Response stopped short
            if matched_points != input_points:

                # Get missing points
                missing = np.setdiff1d(input_points, matched_points)

                # If the first coord is missing, skip first segment (2)
                if np.any(missing == 0):
                    last_point = input_points[0] + start_point
                    start_point += input_points[1]
                    skipped_segs[(
                        pattern,
                        pattern_legs)] = coords[last_point:start_point + 1]
                    pattern_legs += 1
                    continue

                # If some inputs were skipped over (1)
                if min(missing) < max(matched_points):

                    # Get skipped inputs
                    internal_missed = [
                        i for i in missing if i < max(matched_points)
                    ]
                    previous_match = 0
                    skip_count = 0

                    for missed_point in internal_missed:
                        input_index = input_points.index(missed_point)
                        previous_input = input_points[input_index - 1]
                        next_input = input_points[input_index + 1]

                        # Add segments on both sides of skipped stop to skipped list
                        skipped_segs[(
                            pattern, pattern_legs + input_index -
                            1)] = coords[previous_input:missed_point + 1]
                        skipped_segs[(
                            pattern, pattern_legs +
                            input_index)] = coords[missed_point:next_input + 1]

                        # Find leg before skipped point
                        last_good_match = max([
                            matched_points.index(i)
                            for i in input_points[:input_index]
                            if i in matched_points
                        ])
                        next_good_match = min([
                            matched_points.index(i)
                            for i in input_points[input_index:]
                            if i in matched_points
                        ])

                        # Store geometry, distance for segments preceding skipped point
                        for leg in range(previous_match, last_good_match):
                            segment_dict[(
                                pattern, pattern_legs + leg +
                                skip_count)] = store_geometry_and_distance(
                                    result, leg)

                        skip_count += 1
                        previous_match = next_good_match

                    rem_count = 0
                    # Store geometry, distance for segments after last skipped point
                    for leg in range(next_good_match, result_legs):
                        segment_dict[(
                            pattern, input_index + pattern_legs + 1 +
                            rem_count)] = store_geometry_and_distance(
                                result, leg)
                        rem_count += 1

                    # Start next matching at latest matched point
                    start_point += max(
                        [i for i in input_points if i in matched_points])

                # If all missing inputs are after last matched point (2)
                elif len(missing) > 0 and min(missing) > max(matched_points):

                    # Next request should start from first missing point
                    prev_stop = input_points[input_points.index(min(missing)) -
                                             1] + start_point
                    start_point += min(missing)

                    # Determine whether cutoff happened at a stop or in between
                    if max(matched_points) not in input_points:
                        del_last_seg = 1
                    else:
                        del_last_seg = 0

                    # Add segment between last matched point and missing point to skip list
                    skipped_segs[(
                        pattern, pattern_legs + result_legs -
                        del_last_seg)] = coords[prev_stop:start_point + 1]

                    # Store geometry, distance for segments preceding skipped point
                    for leg in range(result_legs - del_last_seg):
                        segment_dict[(pattern, pattern_legs +
                                      leg)] = store_geometry_and_distance(
                                          result, leg)

                    # If we keep last segment, we need to add 1 to pattern legs
                    pattern_legs += (1 - del_last_seg)

            # Store distance and geometry
            else:
                for leg in range(result_legs):
                    segment_dict[(pattern, pattern_legs +
                                  leg)] = store_geometry_and_distance(
                                      result, leg)

            pattern_legs += result_legs + len(internal_missed)

        mm_count += 1
        if mm_count % 100 == 0:
            elapsed_time = time.time() - start_time
            print(mm_count, "of", len(pattern_list),
                  "patterns snapped to road network.", "Elapsed time:",
                  round(elapsed_time, 0))
            start_time = time.time()

    # Run a check that all segments are either in the matched segments or skipped segments
    for pattern in pattern_list:
        pattern_segs = len(pattern_dict[pattern].stops) - 1
        for segment in range(pattern_segs):
            if (pattern, segment) not in segment_dict and (
                    pattern, segment) not in skipped_segs:
                print("Error: Pattern " + pattern + ", Seg " + str(segment) +
                      " not assigned.")

    # Run a check that the number of segments in each pattern is less than (#stops - 1)
    for key in segment_dict:
        pattern = key[0]
        segment = key[1]
        if segment > len(pattern_dict[pattern].stops) - 1:
            print("Error: Too many segments assigned to pattern " + pattern)

    # Run the skipped shapes through trace_attributes to get shapes and distance
    pair_dict = {}
    pair_geom = {}
    for seg in skipped_segs:
        pattern = seg[0]
        sequence = seg[1]
        pair = tuple(pattern_dict[pattern].stops[sequence:sequence + 2])

        # If this pair has already been matched as part of another pattern
        if pair in pair_geom:
            segment_dict[seg] = Segment(pair_geom[pair][0], pair_geom[pair][1])
            continue

        coords = skipped_segs[seg].copy()
        result = get_skipped_segments(coords, request_parameters)
        no_match = False
        while len(result) == 4:
            for coord in coords:
                point_radius = coord['radius']
                coord['radius'] = point_radius + 10
            result = get_skipped_segments(coords, request_parameters)
            if point_radius > 150:
                no_match = True
                break

        if no_match:
            continue

        seg_length = 0
        edge_ids = []
        for edge in result['edges']:
            seg_length += edge['length']
            edge_ids.append(edge['id'])
        segment_dict[seg] = Segment(result['shape'], seg_length)

        # Store edge ids to avoid any duplicate requests
        pair_geom[pair] = [result['shape'], seg_length]
        pair_dict[pair] = edge_ids

    # Construct a dataframe sorted by pattern, sequence with encoded polylines
    route_dict = {}
    used_route_pairs = set()
    df_route = []
    df_pair = []
    df_dir = []
    df_pattern = []
    df_dist = []
    df_index = []
    df_tp = []
    df_encodedline = []
    for pattern in pattern_list:
        route = pattern_dict[pattern].route
        direction = pattern_dict[pattern].direction
        stops = pattern_dict[pattern].stops
        timepoints = pattern_dict[pattern].timepoints
        for stop in range(len(stops) - 1):
            pair = (stops[stop], stops[stop + 1])
            tp = timepoints[stop]
            if (pair + (route, )) not in used_route_pairs:
                df_route.append(route)
                df_pair.append(pair)
                df_dir.append(direction)
                df_pattern.append(pattern)
                df_encodedline.append(segment_dict[(pattern, stop)].geometry)
                df_dist.append(segment_dict[(pattern, stop)].distance)
                df_index.append(
                    str(route) + '-' + str(pair[0]) + '-' + str(pair[1]))
                df_tp.append(str(route) + '-' + str(tp))
                used_route_pairs.add((pair + (route, )))

                if pair in route_dict:

                    route_dict[pair].append(route)
                else:
                    route_dict[pair] = list([route])

    return pd.DataFrame(list(
        zip(df_route, df_pair, df_dir, df_pattern, df_dist, df_index,
            df_encodedline)),
                        columns=[
                            'route_id', 'stop_pair', 'direction', 'pattern',
                            'distance', 'seg_index', 'geometry'
                        ])
Ejemplo n.º 27
0
    def setup_initial_data_for_review(gtfs_feed_zip_file, agency, mode):
        '''This method will select the initial set of data that will be reviewed from the provided GTFS zip file'''
        my_review = review.objects.create(agency=agency, mode=mode)
        view = {
            'agency.txt': {
                'agency_name': agency
            },
            'routes.txt': {
                'route_type': mode
            },
        }
        new_tmp_dir = tempfile.mkdtemp()
        outpath = new_tmp_dir
        ptg.extract_feed(gtfs_feed_zip_file, outpath + "view.zip", view)
        gtfs_feed = ptg.load_feed(gtfs_feed_zip_file)

        new_session_gtfs_path = outpath + "view.zip"

        for category in review_category.objects.all():
            target_field_name = category.gtfs_field.name
            target_table = category.gtfs_field.table
            has_related_field_same_table = category.review_widget.has_related_field_same_table
            has_related_field_other_table = category.review_widget.has_related_field_other_table
            ptg_target_table = getattr(gtfs_feed,
                                       target_table.replace('.txt', ''))
            total_table_rows = ptg_target_table.shape[0]
            ds = data_selector_factory(category.data_selector)

            number_to_sample = ds.select_row_sample_count(total_table_rows)
            random_sample = ptg_target_table.sample(n=number_to_sample)

            reviewed_data_pk_name = get_table_primary_key(target_table)

            for index, row in random_sample.iterrows():
                try:
                    reviewed_data = row[target_field_name]
                except KeyError:
                    reviewed_data = "[blank]"
                try:
                    reviewed_data_pk_value = row[reviewed_data_pk_name]
                except KeyError:
                    reviewed_data_pk_value = None

                this_result = result.objects.create(
                    review=my_review,
                    review_category=category,
                    reviewed_data=reviewed_data,
                    reviewed_data_pk_name=reviewed_data_pk_name,
                    reviewed_data_pk_value=reviewed_data_pk_value)
                if has_related_field_same_table:
                    related_fields = category.review_widget.related_field_same_table.all(
                    )
                    for field in related_fields:
                        try:
                            gtfs_field_value = row[field.name]
                        except KeyError:
                            gtfs_field_value = "[blank]"
                        my_field = related_field.objects.create(
                            gtfs_field=field,
                            result=this_result,
                            gtfs_field_value=gtfs_field_value)

                if has_related_field_other_table:
                    RelatedFieldsSelector = related_fields_selector_factory(
                        category.review_widget)
                    field_list = RelatedFieldsSelector.get_related_fields_from_gtfs(
                        row, gtfs_feed)
                    for field in field_list:
                        gf, created = gtfs_field.objects.get_or_create(
                            name=field[0],
                            table=field[1],
                            type=get_field_type(field[0], field[1]))
                        my_field = related_field.objects.create(
                            gtfs_field=gf,
                            result=this_result,
                            gtfs_field_value=str(field[2]))

        return new_session_gtfs_path, my_review