Example #1
0
def test_read_sleeping_giant_edgelist():
    df = read_edgelist(EDGELIST, keep_optional=True)

    # check that our Sleeping Giant example dataset contains the correct fields and values
    assert [
        'node1', 'node2', 'trail', 'color', 'distance', 'estimate', 'required'
    ] in df.columns.values
    assert math.isclose(df[df['required'] == 1]['distance'].sum(), 26.01)
    assert math.isclose(df['distance'].sum(), 30.48)

    df_req = read_edgelist(EDGELIST, keep_optional=False)
    assert math.isclose(df_req['distance'].sum(), 26.01)
    assert 'req' not in df_req.columns
Example #2
0
def test_create_networkx_graph_from_edgelist():
    df = read_edgelist(EDGELIST, keep_optional=True)
    graph = create_networkx_graph_from_edgelist(df, edge_id='id')

    # check that our starting graph is created correctly
    assert isinstance(graph, nx.MultiGraph)
    assert len(graph.edges()) == 133
    assert len(graph.nodes()) == 78
    assert graph['b_end_east']['b_y'][0]['color'] == 'blue'
    assert graph['b_end_east']['b_y'][0]['trail'] == 'b'
    assert graph['b_end_east']['b_y'][0]['distance'] == 1.32

    # check that starting graph with required trails only is correct
    df_req = read_edgelist(EDGELIST, keep_optional=False)
    graph_req = create_networkx_graph_from_edgelist(df_req, edge_id='id')
    assert isinstance(graph_req, nx.MultiGraph)
    assert len(graph_req.edges()) == 121
    assert len(graph_req.nodes()) == 74
Example #3
0
def rpp(edgelist_filename,
        start_node=None,
        edge_weight='distance',
        verbose=False):
    """
    Solving the RPP from beginning (load network data) to end (finding optimal route).  This optimization makes a
     relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed.
    If this is not so, an assertion is raised.  This class of RPP generalizes to the CPP strategy.

    Args:
        edgelist_filename (str): filename of edgelist.  See cpp.py for more details
        start_node (str): name of starting node.  See cpp.py for more details
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        verbose (boolean): log info messages?

    Returns:
        tuple(list[tuple(str, str, dict)], networkx.MultiGraph]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
        The original graph is returned as well.  This is needed for visualization
    """

    logger_rpp.disabled = not verbose

    logger_rpp.info('read edgelist')
    el = read_edgelist(edgelist_filename, keep_optional=True)

    logger_rpp.info('create full and required graph')
    g_full = create_networkx_graph_from_edgelist(el)
    g_req = create_required_graph(g_full)
    assert_graph_is_connected(g_req)

    logger_rpp.info('getting odd node pairs')
    odd_nodes = get_odd_nodes(g_req)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    logger_rpp.info('get shortest paths between odd nodes')
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        g_full, odd_node_pairs, edge_weight)

    logger_rpp.info('Find min weight matching using blossom algorithm')
    g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths,
                                           flip_weights=True)
    odd_matching = dedupe_matching(
        nx.algorithms.max_weight_matching(g_odd_complete, True))

    logger_rpp.info('add the min weight matching edges to g')
    g_aug = add_augmenting_path_to_graph(g_req, odd_matching)

    logger_rpp.info('get eulerian circuit route')
    circuit = list(create_eulerian_circuit(g_aug, g_full, start_node))

    return circuit, g_full
Example #4
0
def test_read_edgelist_w_ids(GRAPH_1_EDGELIST_W_ID_CSV):
    with warnings.catch_warnings(record=True) as w:
        df = read_edgelist(GRAPH_1_EDGELIST_W_ID_CSV)

        # make sure correct warning was given
        assert len(w) == 1
        assert issubclass(w[-1].category, UserWarning)
        assert "Edgelist contains field named 'id'" in str(w[-1].message)

    assert df.shape == (5, 4)
    assert set(df.columns) == set(['distance', 'node1', 'node2', 'id'])
Example #5
0
def rpp(edgelist_filename,
        complete_g,
        start_node=None,
        edge_weight='distance',
        turn_weight_coefficient=1):
    """
    Solving the RPP from beginning (load network data) to end (finding optimal route).  This optimization makes a
     relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed.
    If this is not so, an assertion is raised.  This class of RPP generalizes to the CPP strategy.
    Args:
        edgelist_filename (str): filename of edgelist.  
        start_node (str): name of starting node.  
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        turn_weight_coefficient (float): turn weight coefficient used to add turn_weight attributes to g_full
    Returns:
        list[tuple(str, str, dict)]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
    """

    el = read_edgelist(edgelist_filename)

    g_full = create_networkx_graph_from_edgelist(el)

    g_full, pos = sl.create_pos_and_add_to_graph(g_full, complete_g)

    g_full = create_turn_weight_edge_attr(
        g_full,
        length_weight='distance',
        normalization_coefficient=turn_weight_coefficient)

    g_req = create_required_graph(g_full)

    sl.visualize_g_req(g_req, pos)

    assert_graph_is_strongly_connected(g_req)

    g_aug = sl.make_graph_eulerian(g_req, g_full)

    sl.is_graph_eulerian(g_aug)

    circuit = list(
        create_eulerian_circuit(
            g_aug,
            g_full,
            str(start_node),
            edge_weight_name=turn_weight_function_distance))

    return circuit
Example #6
0
def cpp(edgelist_filename,
        start_node=None,
        edge_weight='distance',
        verbose=False):
    """
    Solving the CPP from beginning (load network data) to end (finding optimal route).
    Can be run from command line with arguments from cpp.py, or from an interactive Python session (ex jupyter notebook)

    Args:
        edgelist_filename (str): filename of edgelist.  See cpp.py for more details
        start_node (str): name of starting node.  See cpp.py for more details
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        verbose (boolean): log info messages?

    Returns:
        tuple(list[tuple(str, str, dict)], networkx.MultiGraph]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
        The original graph is returned as well.  This is needed for visualization
    """
    logger_cpp.disabled = not verbose

    logger_cpp.info('read edgelist and create base graph')
    el = read_edgelist(edgelist_filename, keep_optional=False)
    g = create_networkx_graph_from_edgelist(el)

    logger_cpp.info('get augmenting path for odd nodes')
    odd_nodes = get_odd_nodes(g)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        g, odd_node_pairs, edge_weight)
    g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths,
                                           flip_weights=True)

    logger_cpp.info('Find min weight matching using blossom algorithm')
    odd_matching = dedupe_matching(
        nx.algorithms.max_weight_matching(g_odd_complete, True))

    logger_cpp.info('add the min weight matching edges to g')
    g_aug = add_augmenting_path_to_graph(g, odd_matching)

    logger_cpp.info('get eulerian circuit route')
    circuit = list(create_eulerian_circuit(g_aug, g, start_node))

    return circuit, g
Example #7
0
def test_nodelist_edgelist_overlap():
    """
    Test that the nodelist and the edgelist contain the same node names.  If using X,Y coordinates for plotting and
    not all nodes have attributes, this could get messy.
    """
    eldf = read_edgelist(EDGELIST, keep_optional=True)
    nldf = pd.read_csv(NODELIST)
    edgelist_nodes = set(eldf['node1'].append(eldf['node2']))
    nodelist_nodes = set(nldf['id'])

    nodes_in_el_but_not_nl = edgelist_nodes - nodelist_nodes
    assert nodes_in_el_but_not_nl == set(), \
        "Warning: The following nodes are in the edgelist, but not the nodelist: {}".format(nodes_in_el_but_not_nl)

    nodes_in_nl_but_not_el = nodelist_nodes - edgelist_nodes
    assert nodes_in_nl_but_not_el == set(), \
        "Warning: The following nodes are in the nodelist, but not the edgelist: {}".format(nodes_in_nl_but_not_el)
Example #8
0
def test_get_shortest_paths_distances():
    df = read_edgelist(EDGELIST)
    graph = create_networkx_graph_from_edgelist(df, edge_id='id')

    odd_nodes = get_odd_nodes(graph)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    # coarsely checking structure of `get_shortest_paths_distances` return value
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        graph, odd_node_pairs, 'distance')
    assert len(odd_node_pairs_shortest_paths) == 630
    assert type(odd_node_pairs_shortest_paths) == dict

    # check that each node name appears the same number of times in `get_shortest_paths_distances` return value
    node_names = list(
        itertools.chain(*[i[0]
                          for i in odd_node_pairs_shortest_paths.items()]))
    assert set(pd.value_counts(node_names)) == set([35])
def main():
    # Connect to Sqlite3 & create table
    sqlite3_conn = dbfun.create_subway_sqlite3(clear_db=True)
    dbfun.add_stations_table_sqlite3(sqlite3_conn)
    dbfun.add_edges_table_sqlite3(sqlite3_conn)

    edgelist = './Data/Paths-Decision-Points.csv'

    el = ppg.read_edgelist(edgelist, keep_optional=False)
    g = ppg.create_networkx_graph_from_edgelist(el)

    odd_nodes = ppg.get_odd_nodes(g)

    # This for loop gets all the euler paths for every combination of start and end nodes,
    # saves the routes/statistics as a dictionary, and inserts it into the database
    for odd_node_pair in itertools.combinations(odd_nodes, 2):
        circuit_name = odd_node_pair[0] + ' - ' + odd_node_pair[1]

        path_stats = {'path': circuit_name}

        logging.basicConfig(level=logging.INFO)
        logger = logging.getLogger(__name__)
        logger.info(f'Solved CPP for {circuit_name}')

        # For some reason, the no_return_cpp is returning the path backwards so the end_node is passed as the start
        circuit, graph = no_return_cpp(edgelist, odd_node_pair[1], odd_node_pair[0])

        # Formats the route and adds it to the dictionary along with the other stats
        route = '-'.join([edge[0] for edge in circuit])
        route = route + '-' + odd_node_pair[1]
        path_stats.update(calculate_postman_solution_stats(circuit))
        path_stats['route'] = route

        # Inserts into Sqlite3
        dbfun.insert_into_sqlite3(sqlite3_conn, path_stats)

    # Add rankings
    dbfun.add_route_ranks(sqlite3_conn)
Example #10
0
def test_add_node_attributes():
    # create objects for testing
    df = read_edgelist(EDGELIST)
    graph = create_networkx_graph_from_edgelist(df, edge_id='id')
    nodelist_df = pd.read_csv(NODELIST)
    graph_node_attrs = add_node_attributes(graph, nodelist_df)

    assert len(graph_node_attrs.nodes()) == 74

    # check that each node attribute has an X and Y coordinate
    for k, v in graph_node_attrs.nodes(data=True):
        assert 'X' in v
        assert 'Y' in v

    # spot check node attributes for first node
    node_data_from_graph = list(graph_node_attrs.nodes(data=True))

    node_names = [n[0] for n in node_data_from_graph]
    assert 'rs_end_north' in node_names

    key = node_names.index('rs_end_north')
    assert node_data_from_graph[key][1]['X'] == 1772
    assert node_data_from_graph[key][1]['Y'] == 172
Example #11
0
def test_read_edgelist(GRAPH_1_EDGELIST_CSV):
    df = read_edgelist(GRAPH_1_EDGELIST_CSV)
    assert df.shape == (5, 3)
    assert set(df.columns) == set(['distance', 'node1', 'node2'])
Example #12
0
def cpp(edgelist_filename,
        start_node=None,
        edge_weight='distance',
        verbose=False,
        graphml=False,
        max_distance=None,
        max_degree_connect=0,
        g=None):
    """
    Solving the CPP from beginning (load network data) to end (finding optimal route).
    Can be run from command line with arguments from cpp.py, or from an interactive Python session (ex jupyter notebook)

    Args:
        edgelist_filename (str): filename of edgelist.  See cpp.py for more details
        start_node (str): name of starting node.  See cpp.py for more details
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        verbose (boolean): log info messages?
        graphml (boolean): is edgelist filename a in graphml format?
        max_distance (double): NOT IMPLEMENTED
        max_degree_connect (int): NOT IMPLEMENTED
        g (networkx multigraph): pre-loaded networkx MultiGraph. Either g or edgelist_filename must be specified. If both are given, filename will be used.

    Returns:
        tuple(list[tuple(str, str, dict)], networkx.MultiGraph]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
        The original graph is returned as well.  This is needed for visualization
    """
    logger_cpp.disabled = not verbose

    reset_ids = False

    logger_cpp.info('initialize graph')
    if edgelist_filename is not None:
        # edgelist filename is given - load graph from file
        if graphml:
            g = read_graphml(edgelist_filename,
                             edge_weight=edge_weight,
                             max_degree_connect=max_degree_connect)

            # make sure edge id exists and is unique
            shared_keys = set.intersection(
                *[set(z.keys()) for x, y, z in list(g.edges(data=True))])
            if 'id' not in shared_keys:
                reset_ids = True
            else:
                # id is already specified - ensure that it is unique
                if len({edg[3]['id']
                        for edg in g.edges(keys=True, data=True)
                        }) != g.number_of_edges():
                    warnings.warn(
                        "Edgelist contains field named 'id' but the values provided are not unique."
                        "Replacing id field with uniquely defined values.")
                    #raise ValueError("If id is specified on edges of g_full it must be unique!")
                    reset_ids = True

        else:
            el = read_edgelist(edgelist_filename, keep_optional=False)
            g = create_networkx_graph_from_edgelist(el)
    elif g is None:
        # none of edgelist filename or g is given - no graph specified
        raise TypeError("One of edgelist_filename or g must be given!")
    else:
        # use g - must ensure that format matches the expected format
        g = nx.MultiGraph(g)
        # check for all needed fields - if id is not set it will be set manually
        shared_keys = set.intersection(
            *[set(z.keys()) for x, y, z in list(g.edges(data=True))])
        if edge_weight not in shared_keys:
            raise ValueError(
                "g must include value for '{}' for every edge".format(
                    edge_weight))
        if 'id' not in shared_keys:
            # create new id
            reset_ids = True
        else:
            # id is already specified - ensure that it is unique
            if len({edg[3]['id']
                    for edg in g.edges(keys=True, data=True)
                    }) != g.number_of_edges():
                warnings.warn(
                    "Edgelist contains field named 'id' but the values provided are not unique."
                    "Replacing id field with uniquely defined values.")
                reset_ids = True

    # if needed, create new id
    if reset_ids:
        for ii, edg in enumerate(g.edges(keys=True)):
            g.edges[edg]['id'] = str(ii)

    # if start node is given, make sure it's a string!
    if start_node is not None:
        start_node = str(start_node)

    logger_cpp.info('get augmenting path for odd nodes')
    odd_nodes = get_odd_nodes(g)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    # 'x' and 'y' is not in the generated graphml file, so this filtering is not supported until x and y is added
    # odd_node_pairs = filter_by_haversine_distance(g, odd_node_pairs, max_distance=max_distance)

    start = time.time()
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        g, odd_node_pairs, edge_weight)
    g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths,
                                           flip_weights=True)

    logger_cpp.info('Find min weight matching using blossom algorithm')
    odd_matching = dedupe_matching(
        nx.algorithms.max_weight_matching(g_odd_complete, True))

    logger_cpp.info('add the min weight matching edges to g')
    g_aug = add_augmenting_path_to_graph(g, odd_matching)

    print(len(get_odd_nodes(g)), ' odd nodes, now', len(get_odd_nodes(g_aug)),
          nx.is_connected(g_aug))
    logger_cpp.info('get eulerian circuit route')

    #pdb.set_trace();

    circuit = list(create_eulerian_circuit(g_aug, g, start_node))
    end = time.time()
    print('matching and augment time:', end - start)

    # Remove already visited nodes starting from the back (since we dont care about the "full circuit")
    new_ending_idx = len(circuit) - 1
    for idx in range(0, len(circuit), 1):
        end_offset_idx = len(circuit) - 1 - idx
        if circuit[idx][0] == circuit[end_offset_idx][0] or circuit[idx][
                0] == circuit[end_offset_idx][1] or circuit[idx][1] == circuit[
                    end_offset_idx][0] or circuit[idx][1] == circuit[
                        end_offset_idx][1]:
            new_ending_idx = end_offset_idx
        else:
            break

    circuit = circuit[idx + 1:]
    print('Removed', idx, 'edges from the circuit start')

    return circuit, g
Example #13
0
def rpp(edgelist_filename=None,
        start_node=None,
        edge_weight='distance',
        verbose=False,
        graphml=False,
        max_distance=None,
        max_degree_connect=None,
        g_full=None):
    """
    Solving the RPP from beginning (load network data) to end (finding optimal route).  This optimization makes a
     relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed.
    If this is not so, an assertion is raised.  This class of RPP generalizes to the CPP strategy.

    Args:
        edgelist_filename (str): filename of edgelist.  See cpp.py for more details
        start_node (str or can be cast to str): name of starting node.  See cpp.py for more details
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        verbose (boolean): log info messages?
        graphml (boolean): is edgelist filename a in graphml format?
        max_distance (double): NOT IMPLEMENTED
        max_degree_connect (int): min degree of a node in the full graph -- nodes with smaller degree are connected with all-to-all optional edges. Use -1 for all-to-all graph.
        g_full (networkx multigraph): pre-loaded networkx MultiGraph. Either g_full or edgelist_filename must be specified. If both are given, filename will be used.

    Returns:
        tuple(list[tuple(str, str, dict)], networkx.MultiGraph]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
        The original graph is returned as well.  This is needed for visualization
    """

    print("Running RPP solver!")

    #pdb.set_trace()

    logger_rpp.disabled = not verbose
    logger_rpp.info('initialize full graph')

    reset_ids = False

    if edgelist_filename is not None:
        # edgelist filename is given - load graph from file

        if graphml:
            # read in the graph
            g_full = read_graphml(edgelist_filename, edge_weight,
                                  max_degree_connect)

            # make sure edge id exists and is unique
            shared_keys = set.intersection(
                *[set(z.keys()) for x, y, z in list(g_full.edges(data=True))])
            if 'id' not in shared_keys:
                reset_ids = True
            else:
                # id is already specified - ensure that it is unique
                if len({
                        edg[3]['id']
                        for edg in g_full.edges(keys=True, data=True)
                }) != g_full.number_of_edges():
                    warnings.warn(
                        "Edgelist contains field named 'id' but the values provided are not unique."
                        "Replacing id field with uniquely defined values.")
                    #raise ValueError("If id is specified on edges of g_full it must be unique!")
                    reset_ids = True

        else:
            # regular csv file format...
            el = read_edgelist(edgelist_filename, keep_optional=True)
            g_full = create_networkx_graph_from_edgelist(el)
    elif g_full is None:
        # none of edgelist filename or g_full is given - no graph specified
        raise TypeError("One of edgelist_filename or g_full must be given!")
    else:
        # use g_full - must ensure that format matches the expected format
        g_full = nx.MultiGraph(g_full)
        # check for all needed fields - if id is not set it will be set manually
        shared_keys = set.intersection(
            *[set(z.keys()) for x, y, z in list(g_full.edges(data=True))])
        if not all([x in shared_keys for x in {'required', edge_weight}]):
            raise ValueError(
                "g_full must include values for 'required' and '{}' for every edge"
                .format(edge_weight))
        if 'id' not in shared_keys:
            # not every edge has a defined edge id - create a new one.
            reset_ids = True
        else:
            # id is already specified - ensure that it is unique
            if len({
                    edg[3]['id']
                    for edg in g_full.edges(keys=True, data=True)
            }) != g_full.number_of_edges():
                warnings.warn(
                    "Edgelist contains field named 'id' but the values provided are not unique."
                    "Replacing id field with uniquely defined values.")
                reset_ids = True

    # if needed, create new id
    if reset_ids:
        for ii, edg in enumerate(g_full.edges(keys=True)):
            g_full.edges[edg]['id'] = str(ii)

    # if start node is given, make sure it's a string!
    if start_node is not None:
        start_node = str(start_node)

    # if required graph is not connected, use additional edges from g_full to make it connected
    logger_rpp.info('create required graph')
    g_req = create_required_graph(g_full)
    if not is_connected(g_req):
        make_connected(g_req, g_full, edge_weight)  # THIS STEP COULD BE SLOW

    logger_rpp.info('getting odd node pairs')
    odd_nodes = get_odd_nodes(g_req)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    start = time.time()
    logger_rpp.info('get shortest paths between odd nodes')
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        g_full, odd_node_pairs, edge_weight)

    logger_rpp.info('Find min weight matching using blossom algorithm')
    g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths,
                                           flip_weights=True)
    odd_matching = dedupe_matching(
        nx.algorithms.max_weight_matching(g_odd_complete, True))

    logger_rpp.info('add the min weight matching edges to g')
    g_aug = add_augmenting_path_to_graph(g_req, odd_matching)

    logger_rpp.info('get eulerian circuit route')

    #pdb.set_trace();

    circuit = list(
        create_eulerian_circuit(g_aug,
                                g_full,
                                start_node,
                                edge_weight=edge_weight))
    end = time.time()
    print('matching and augment time:', end - start)

    # Remove already visited nodes starting from the back (since we dont care about the "full circuit")
    new_ending_idx = len(circuit) - 1
    for idx in range(0, len(circuit), 1):
        end_offset_idx = len(circuit) - 1 - idx
        if circuit[idx][0] == circuit[end_offset_idx][0] or circuit[idx][
                0] == circuit[end_offset_idx][1] or circuit[idx][1] == circuit[
                    end_offset_idx][0] or circuit[idx][1] == circuit[
                        end_offset_idx][1]:
            new_ending_idx = end_offset_idx
        else:
            break

    circuit = circuit[idx + 1:]
    print('Removed', idx, 'edges from the circuit start')

    return circuit, g_full