Beispiel #1
0
def test_feed_edge_types():
    path = fixture('samtrans-2017-11-28.zip')
    feed = get_representative_feed(path)

    start = 7 * 60 * 60
    end = 10 * 60 * 60
    G1 = load_feed_as_graph(feed, start, end)

    # In the base case, all should be transit
    for _, _, e in G1.edges(data=True):
        assert e['mode'] == 'transit'

    # Now perform a second check where we impute walk edges
    G2 = load_feed_as_graph(feed, start, end, impute_walk_transfers=True)

    # Count the number of edge types by mode, which should now
    # include walk edges as well
    transit_count = 0
    walk_count = 0
    for _, _, e in G2.edges(data=True):
        if e['mode'] == 'transit':
            transit_count += 1
        if e['mode'] == 'walk':
            walk_count += 1

    # And make sure the correct number were made
    assert transit_count == 1940
    assert walk_count == 864
Beispiel #2
0
def test_generate_summary_graph_elements():
    path_1 = fixture('samtrans-2017-11-28.zip')
    feed_1 = get_representative_feed(path_1)

    start = 7 * 60 * 60
    end = 10 * 60 * 60
    interpolate_times = True

    # Make sure everything works the same with both multiprocessing on/off
    for use_multiprocessing in [True, False]:
        (summary_edge_costs,
         wait_times_by_stop) = generate_summary_graph_elements(
             feed_1, start, end, FALLBACK_STOP_COST_DEFAULT, interpolate_times,
             use_multiprocessing)

        # Ensure that the summary edge cost dataframe looks as it should
        ec_cols = ['edge_cost', 'from_stop_id', 'to_stop_id']
        for c in ec_cols:
            assert c in summary_edge_costs.columns

        # Make sure that all edges are unique - there are no duplicated
        # in the returned edge dataframe (each should be its own summary)
        f = summary_edge_costs.from_stop_id
        t = summary_edge_costs.to_stop_id
        z = list(zip(f, t))
        assert len(list(set(z))) == len(z)

        # Ensure that the wait times dataframe looks as it should
        wt_cols = ['avg_cost', 'stop_id']
        for c in wt_cols:
            assert c in wait_times_by_stop.columns

        # Sanity check edge costs
        mask = (wait_times_by_stop.avg_cost < 0)
        assert len(wait_times_by_stop[mask]) == 0

        # Make sure that there are stop ids unique
        u = wait_times_by_stop.stop_id.unique()
        assert len(u) == len(wait_times_by_stop)

        # Another sanity check, we should be sure that the resulting
        # edges list captures all the stops that were assigned null
        # values in the fixture dataset were assigned a linearly imputed
        # arrival and departure time and thus preserved as a stop
        # in the edge list

        # First get the null times mask
        null_times = feed_1.stop_times.departure_time.isnull()
        # And identify all unique stops from the original feed
        null_stop_ids = feed_1.stop_times[null_times].stop_id.unique()

        # Now let's take the list of these null stop ids and extract
        # all the ones from that list in the summary edge dataframe
        mask = summary_edge_costs.from_stop_id.isin(null_stop_ids)
        # And now we can get the stop ids out from this list
        preserved_from_nulls = summary_edge_costs.from_stop_id[mask].unique()
        assert len(preserved_from_nulls) == 205
Beispiel #3
0
def test_parsing_when_just_on_trip_during_target_window():
    path = fixture('highdesertpointorus-2018-03-20.zip')
    feed = get_representative_feed(path)

    start = 7 * 60 * 60  # 7:00 AM
    end = 8 * 60 * 60  # 10:00 AM
    G = load_feed_as_graph(feed, start, end)
    assert len(list(G.nodes())) == 2
    assert len(list(G.edges())) == 1
Beispiel #4
0
def test_loading_in_too_small_timeframes():
    path_1 = fixture('caltrain-2017-07-24.zip')
    feed_1 = get_representative_feed(path_1)

    # Loading in a time frame that will result
    # in no valid results
    start = 0
    end = 1
    with pytest.raises(InsufficientSummaryResults):
        load_feed_as_graph(feed_1, start, end)
Beispiel #5
0
def test_feed_to_graph_plot():
    path = fixture('caltrain-2017-07-24.zip')
    feed = get_representative_feed(path)

    start = 7 * 60 * 60
    end = 10 * 60 * 60

    G = load_feed_as_graph(feed, start, end)

    fig, ax = generate_plot(G)
Beispiel #6
0
def test_save_and_read_zip():
    path_1 = fixture('caltrain-2017-07-24.zip')
    feed_1 = get_representative_feed(path_1)

    start = 7 * 60 * 60
    end = 10 * 60 * 60

    G1 = load_feed_as_graph(feed_1, start, end, 'foo')

    # Get counts as a measure to compare with save-read results
    nodes_len_g1 = len(list(G1.nodes()))
    edges_len_g1 = len(list(G1.edges()))

    # First save the graph to a zip
    zip_fpath = 'foobar.zip'
    save_graph_to_zip(G1, zip_fpath)

    # Then read in as a new graph
    G2 = graph_from_zip(zip_fpath)

    # Also immediately remove the zip file so it's not hanging
    # around or impacting later tests
    os.remove(zip_fpath)

    # Get new lengths
    nodes_len_g2 = len(list(G2.nodes()))
    edges_len_g2 = len(list(G2.edges()))

    # They should both be the same as the ones from G1
    assert nodes_len_g1 == nodes_len_g2
    assert edges_len_g1 == edges_len_g2

    # Make sure same numbers of unique nodes are present
    set_n1 = set(list(G1.nodes()))
    set_n2 = set(list(G2.nodes()))
    assert len(set_n1) == len(set_n2)

    # Make sure that all nodes are accounted for
    for n in set_n1:
        assert n in set_n2

    # Do the same for the edges
    e1 = list(G1.edges())
    e2 = list(G2.edges())
    for edge_pair in e1:
        assert edge_pair in e2

    # Also make sure the basic attributes are preserved
    for node_id, node in G2.nodes(data=True):
        for key in ['boarding_cost', 'modes', 'x', 'y']:
            assert key in node.keys()

    for from_id, to_id, edge in G2.edges(data=True):
        for key in ['length', 'mode']:
            assert key in edge.keys()
Beispiel #7
0
def test_feeds_with_no_direction_id():
    path = fixture('samtrans-2017-11-28.zip')
    feed = get_representative_feed(path)

    # Overwrite the direction id columns in trips df to be nan
    feed.trips['direction_id'] = np.nan

    start = 7 * 60 * 60
    end = 10 * 60 * 60
    G = load_feed_as_graph(feed, start, end)

    # Make sure each node has numeric boarding cost
    for i, node in G.nodes(data=True):
        assert not np.isnan(node['boarding_cost'])
Beispiel #8
0
def test_feed_to_graph_performance():
    # Replicate the original workflow of the graph creation path
    # but open up to expose to benchmarking/performance profiling
    start = 7 * 60 * 60
    end = 10 * 60 * 60
    interpolate_times = True
    use_multiprocessing = False

    print('Running time profiles on each major '
          'function in graph generation workflow')

    a = time()
    path = fixture('samtrans-2017-11-28.zip')
    feed = get_representative_feed(path)
    elapsed = round(time() - a, 2)
    print('Perf of get_representative_feed: {}s'.format(elapsed))

    fl = len(feed.routes)
    print('Iteration on {} routes.'.format(fl))

    a = time()
    (all_edge_costs,
     all_wait_times) = generate_edge_and_wait_values(feed, start, end,
                                                     interpolate_times,
                                                     use_multiprocessing)
    elapsed = round(time() - a, 2)
    print('Perf of generate_edge_and_wait_values: {}s'.format(elapsed))

    a = time()
    summary_edge_costs = generate_summary_edge_costs(all_edge_costs)
    elapsed = round(time() - a, 2)
    print('Perf of generate_summary_edge_costs: {}s'.format(elapsed))

    a = time()
    wait_times_by_stop = generate_summary_wait_times(
        all_wait_times, FALLBACK_STOP_COST_DEFAULT)
    elapsed = round(time() - a, 2)
    print('Perf of generate_summary_wait_times: {}s'.format(elapsed))

    a = time()
    G = generate_empty_md_graph('foo')
    elapsed = round(time() - a, 2)
    print('Perf of generate_empty_md_graph: {}s'.format(elapsed))

    a = time()
    G = populate_graph(G, 'bar', feed, wait_times_by_stop, summary_edge_costs,
                       50, 4.5)
    elapsed = round(time() - a, 2)
    print('Perf of populate_graph: {}s'.format(elapsed))
Beispiel #9
0
def test_convert_multidigraph_to_digraph():
    path = fixture('samtrans-2017-11-28.zip')
    feed = get_representative_feed(path)

    # Shorter amount of time to speed up the test
    start = 7 * 60 * 60
    end = 8 * 60 * 60
    Gmdg = load_feed_as_graph(feed, start, end, name='foobar')

    # Run conversaion operation
    Gdg = convert_to_digraph(Gmdg)

    assert isinstance(Gdg, nx.DiGraph)
    assert len(Gdg.edges()) == len(Gmdg.edges())
    assert len(Gdg.nodes()) == len(Gmdg.nodes())
Beispiel #10
0
def test_loading_in_invalid_timeframes():
    path_1 = fixture('caltrain-2017-07-24.zip')
    feed_1 = get_representative_feed(path_1)

    # Loading in a timeframe where the
    # start comes before the end
    start = 500
    end = 100
    with pytest.raises(InvalidTimeBracket):
        load_feed_as_graph(feed_1, start, end)

    # Loading in a timeframe is of length 0
    start = 0
    end = 0
    with pytest.raises(InvalidTimeBracket):
        load_feed_as_graph(feed_1, start, end)

    start = 1000
    end = 1000
    with pytest.raises(InvalidTimeBracket):
        load_feed_as_graph(feed_1, start, end)
Beispiel #11
0
def test_simplify_graph():
    path = fixture('samtrans-2017-11-28.zip')
    feed = get_representative_feed(path)

    # Shorter amount of time to speed up the test
    start = 7 * 60 * 60
    end = 8 * 60 * 60
    G = load_feed_as_graph(feed, start, end, name='foobar')

    # Run simplification
    Gs = simplify_graph(G)

    # TODO: We have this ongoing issue where we can't
    #       consistently test by index for edges, so we need
    #       to figure out _how_ to test for a specific edge
    assert len(Gs.nodes()) == 298
    assert len(Gs.edges()) == 451

    # Pull out a summary list of edges as dicts
    all_es = []
    for e_fr, e_to, edge in Gs.edges(data=True):
        edge['from'] = e_fr
        edge['to'] = e_to

        # Let's just look at those that have a larger
        # length associated with them and were coalesced from
        # other internal ways (so a geometry object is present)
        if edge['length'] > 110 and 'geometry' in edge.keys():
            all_es.append(edge)

    # Sort the list and pull the max out, where max is determined
    # based on the number of coordinates in the LineString
    target_edge = max(all_es, key=lambda x: len(x['geometry'].coords.xy[0]))
    assert target_edge['length'] == 5114.0
    assert target_edge['mode'] == 'transit'
    assert target_edge['from'] == 'foobar_351008'
    assert target_edge['to'] == 'foobar_334008'
    assert len(target_edge['geometry'].coords.xy[0]) == 49
Beispiel #12
0
def test_extract_valid_feed():
    # Read in without name, or any
    # other optional arguments
    path = fixture('caltrain-2017-07-24.zip')
    feed = get_representative_feed(path)
    assert isinstance(feed, ptg.gtfs.feed)
Beispiel #13
0
def test_empty_feed():
    path = fixture('empty.zip')
    with pytest.raises(InvalidGTFS):
        get_representative_feed(path)
Beispiel #14
0
def test_feed_to_graph_path():
    path_1 = fixture('caltrain-2017-07-24.zip')
    feed_1 = get_representative_feed(path_1)

    start = 7 * 60 * 60
    end = 10 * 60 * 60

    G = load_feed_as_graph(feed_1, start, end, 'foo')

    # We should assume all routes do not have segments that exceed some
    # given length (measured in seconds)
    max_reasonable_segment_length = 60 * 60
    _check_unreasonable_lengths(G, max_reasonable_segment_length)

    # Sanity check that the number of nodes and edges go up
    orig_node_len = len(G.nodes())
    orig_edge_len = len(G.edges())
    orig_node_list = list(G.nodes())

    path_2 = fixture('samtrans-2017-11-28.zip')
    feed_2 = get_representative_feed(path_2)
    G = load_feed_as_graph(feed_2, start, end, 'bar', G)

    assert isinstance(G, nx.MultiDiGraph)
    _check_unreasonable_lengths(G, max_reasonable_segment_length)

    # Part 2 of sanity check that the number of nodes and edges go up
    node_len_2 = len(G.nodes())
    edge_len_2 = len(G.edges())
    assert node_len_2 > orig_node_len
    assert edge_len_2 > orig_edge_len

    connector_edge_count = 0
    for from_node, to_node, edge in G.edges(data=True):
        # Make sure that a length measure has been calculated for each
        # edge in the resulting graph, also sanity check that all are
        # positive values
        assert 'length' in edge.keys()
        assert isinstance(edge['length'], float)
        assert edge['length'] >= 0

        # Also, we should also make sure that edges were also created that
        # connect the two feeds
        from_orig_a = from_node in orig_node_list
        from_orig_b = to_node in orig_node_list
        one_valid_fr = from_orig_a and (not from_orig_b)
        one_valid_to = (not from_orig_a) and from_orig_b
        if one_valid_fr or one_valid_to:
            connector_edge_count += 1

    # We know that there should be 9 new edges that are created to connect
    # the two GTFS feeds in the joint graph
    assert connector_edge_count == 9

    # Now reload in the synthetic graph geojson
    geojson_path = fixture('synthetic_san_bruno.geojson')
    with open(geojson_path, 'r') as gjf:
        reference_geojson = json.load(gjf)

    # Then load it onto the graph, as well
    G = load_synthetic_network_as_graph(reference_geojson, existing_graph=G)

    # And make sure it connected correctly
    node_len_3 = len(G.nodes())
    edge_len_3 = len(G.edges())
    assert node_len_3 - node_len_2 == 74
    assert edge_len_3 - edge_len_2 == 80