예제 #1
0
def simplify(streckennetz: nx.MultiGraph) -> nx.MultiGraph:
    print('simplifying...')

    # Remove the shorter edge of two parallel edges
    edges_to_remove = []
    for edges_index in streckennetz.edges():
        edges = streckennetz[edges_index[0]][edges_index[1]]
        if len(edges) > 1:
            min_length = edges[0]['length']
            min_length_index = 0
            for i in edges:
                if edges[i]['length'] < min_length:
                    min_length = edges[i]['length']
                    min_length_index = i
            edges_to_remove.append(list(edges_index) + [min_length_index])
    print('found', len(edges_to_remove), 'parallel edges')
    streckennetz.remove_edges_from(edges_to_remove)

    # Remove nodes, that only have a single edge
    while True:
        nodes_to_remove = []
        for node in streckennetz.nodes():
            if streckennetz.degree(
                    node) < 2 and 'type' not in streckennetz.nodes[node]:
                nodes_to_remove.append(node)
        if nodes_to_remove:
            print('found', len(nodes_to_remove), 'deadend nodes')
            streckennetz.remove_nodes_from(nodes_to_remove)
            nodes_to_remove = []
        else:
            break
    return streckennetz
예제 #2
0
    def _get_fbvs_max_size(self, g: MultiGraph, k: int) -> set:
        # Exhaustively apply reductions
        k, x0 = self.apply_reductions(g, k)

        # Originally reduction 5: if k < 0, terminate the algorithm and conclude that
        # (G, k) is a no-instance.
        if k < 0:
            return None

        # If G is an empty graph, then we return soln_redux
        if len(g) == 0:
            return x0

        # Pick a random edge, then a random end node of that edge
        rand_edge = choice(g.edges())
        v = choice(rand_edge)

        # We recurse on (G - v, k − 1).
        xn = self._get_fbvs_max_size(graph_minus(g, {v}), k - 1)

        if xn is None:
            # If the recursive step returns a failure, then we return a failure as well.
            return None
        else:
            # If the recursive step returns a feedback vertex set Xn, then we return X = Xn ∪ {v} ∪ X0.
            return xn.union({v}).union(x0)
예제 #3
0
    def _get_fbvs_max_size(self, g: MultiGraph, k: int) -> set:
        # Exhaustively apply reductions
        k, x0 = self.apply_reductions(g, k)

        # Originally reduction 5: if k < 0, terminate the algorithm and conclude that
        # (G, k) is a no-instance.
        if k < 0:
            return None

        # If G is an empty graph, then we return soln_redux
        if len(g) == 0:
            return x0

        # Pick a random edge, then a random end node of that edge
        rand_edge = choice(g.edges())
        v = choice(rand_edge)

        # We recurse on (G - v, k − 1).
        xn = self._get_fbvs_max_size(graph_minus(g, {v}), k - 1)

        if xn is None:
            # If the recursive step returns a failure, then we return a failure as well.
            return None
        else:
            # If the recursive step returns a feedback vertex set Xn, then we return X = Xn ∪ {v} ∪ X0.
            return xn.union({v}).union(x0)
예제 #4
0
def test_relabel_nodes_multigraph():
    """failed after switching to dg.relabel_nodes"""
    G = MultiGraph([('a', 'b'), ('a', 'b')])
    mapping = {'a': 'aardvark', 'b': 'bear'}
    G = relabel_nodes(G, mapping, copy=False)
    assert sorted(G.nodes()) == ['aardvark', 'bear']
    assert_edges_equal(sorted(G.edges()), [('aardvark', 'bear'),
                                           ('aardvark', 'bear')])
예제 #5
0
def graph_minus(g: MultiGraph, w: set) -> MultiGraph:
	gx = MultiGraph()
	for (n1, n2) in g.edges():
		if n1 not in w and n2 not in w:
			gx.add_edge(n1, n2)
	for n in g.nodes():
		if n not in w:
			gx.add_node(n)
	return gx
예제 #6
0
파일: utils.py 프로젝트: oStritze/sna
def generate_snapshots_over_time(G: nx.MultiGraph,
                                 minutes=0,
                                 hours=0,
                                 days=0,
                                 max_snapshots=None,
                                 interval=None,
                                 include_final=False,
                                 cummulative=True):
    """
	:param G: MultiGraph you want to watch over time, with "created_at" edge attribute
	:param minutes: Number of minutes between two snapshots (Default 0)
	:param hours: Number of hours between two snapshots (Default 0)
	:param days: Number of days between two snapshots (Default 0)
	:param max_snapshots: Maximum number of generated snapshots (Default None - all snapshots are created)
	:param interval: Tuple (start, end) specifying in which interval to generate snapshots (Default None - takes min, max created_at date of G)
	:param include_final: If True, set G as value for max_date (Default False)
	:return: A dict with timestamps
	"""
    if nx.get_edge_attributes(G, "created_at") == {}:
        raise Exception("Graph needs 'created_at' edge attribute")
    if minutes < 0 or hours < 0 or days < 0 or minutes + hours + days <= 0:
        raise Exception("Illegal minutes, hours or days values")

    edges = G.edges(data=True)
    created_ats = [attr["created_at"] for (_, _, attr) in edges]
    if interval is None:
        date = str_to_datetime(min(created_ats))
        max_date = str_to_datetime(max(created_ats))
    else:
        date = str_to_datetime(interval[0])
        max_date = str_to_datetime(interval[1])
    snapshots = {}
    while date <= max_date and (max_snapshots is None
                                or len(snapshots) < max_snapshots):
        if cummulative:
            edges_snapshot = [(a, b) for (a, b, attr) in edges
                              if str_to_datetime(attr["created_at"]) <= date]
        else:
            edges_snapshot = [
                (a, b) for (a, b, attr) in edges
                if (date <= str_to_datetime(attr["created_at"]) <= date +
                    timedelta(minutes=minutes, hours=hours, days=days))
            ]
        nodes_snapshot = list(sum(edges_snapshot, ()))
        G_snapshot = G.subgraph(nodes_snapshot)
        snapshots[date] = reduce_multi_graph(G_snapshot)

        date += timedelta(minutes=minutes, hours=hours, days=days)

    if include_final:
        snapshots[max_date] = reduce_multi_graph(G)
    return snapshots
예제 #7
0
파일: utils.py 프로젝트: oStritze/sna
def reduce_multi_graph(graph: nx.MultiGraph, thresh=0):
    """
	:return: A non-multi graph from a multi graph, where the number of edges between two nodes is preserved in an edge attribute "weight"
	"""
    width_dict = Counter([tuple(sorted(edge)) for edge in graph.edges()])
    edge_width = [(u, v, value) for ((u, v), value) in width_dict.items()]
    all_int_weighted = pd.DataFrame(edge_width,
                                    columns=["user1", "user2", "weight"])
    all_int_weighted_high = all_int_weighted[all_int_weighted.weight >= thresh]
    return nx.from_pandas_edgelist(all_int_weighted_high,
                                   source="user1",
                                   target="user2",
                                   edge_attr=True)
예제 #8
0
 def get_edge_attr_feature_matrix(self, G:nx.MultiGraph, doc,
                                  embd_dim=96, embedding_type=None, as_torch=True, is_padding_pos=True, **kwargs):
     """ Edge feature matrix wish shape [num_edges, edge_feat_dim]"""
     assert G is not None
     EDV, EV = G.edges(data=True), G.edges(data=False)
     E = len(EDV)
     M = (embd_dim+3) if is_padding_pos else embd_dim
     token2vec = {}
     # RS N.b.: Gt side token 'pos' are captured in node attr, duplicates are overridden
     for token in doc:
         token2vec[token.text] = token.vector
     feat_mats = []
     for i, (_, _, feat_dict) in enumerate(EDV):
         for key, value in feat_dict.items():
             v_embd = token2vec[value]
             v_embd = v_embd.reshape((-1, embd_dim))
             #logger.debug(f"value = {value}\n v_embd = {v_embd}")
             if is_padding_pos:
                 v_embd = np.pad(v_embd, ((0,0), (0, 3)), 'constant', constant_values=(0, 0.0))  # [1, embd_dim+3]
             feat_mats.append(v_embd)
     #unique_values = set(reduce(lambda x1, x2: x1 + x2, data.values))
     if len(feat_mats) > 1:
         feat_mats = reduce(lambda a, b: np.vstack((a, b)), feat_mats)
     else:
         feat_mats = feat_mats[0].reshape((1, -1))
     if as_torch:
         feat_mats = torch.from_numpy(feat_mats).float()
         if kwargs.get('is_cuda'):
             device = 'cuda' if torch.cuda.is_available() else 'cpu'
             feat_mats = feat_mats.to(device)
     try:
         # Ensure single edges are properly reshaped
         assert feat_mats.shape == (E, M)
     except AssertionError as ae:
         ## RuntimeError: Edge indices and edge attributes hold a differing number of edges,
         logger.debug(f"{ae}\n feat_mats.shape = {feat_mats.shape}")
     return feat_mats
예제 #9
0
def subgraph_by_timestamp(mg: nx.MultiGraph, start: int, end: int) -> nx.Graph:
    edges = filter(
        lambda edge: start <= edge[2]["timestamp"] and edge[2]["timestamp"] <
        end,
        mg.edges(data=True),
    )
    g = nx.Graph()
    for node in mg.nodes():
        g.add_node(node)
    for u, v, data in edges:
        if g.has_edge(u, v):
            g[u][v]["weight"] += data["weight"]
        else:
            g.add_edge(u, v, weight=data["weight"])
    return g
예제 #10
0
def get_channel_with_minimal_fee_base(subgraph: nx.MultiGraph, source, target):
    """
    Get a sub-graph containing exactly two nodes - one is the source and the other is the destination.
    The function calculate the channel with the minimal base-fee and returns it.

    :param subgraph: The MultiGraph containing the nodes src and dest.
    :param source: The source node.
    :param target: The target node.
    :return: The channel with the minimal base-fee.
    """
    assert set(subgraph.nodes) == {source, target}, \
        "BAD USAGE - you should give a graph containing only the two given nodes"

    min_fee: float = float('inf')
    min_fee_channel_id = None

    for node1, node2, channel_data in subgraph.edges(data=True):
        if source == channel_data['node1_pub']:
            source_i, target_i = 1, 2
        elif source == channel_data['node2_pub']:
            source_i, target_i = 2, 1
        else:
            assert False, "WTF? Neither 'source' nor 'target' are in the channel."

        # When money is transferred from the source node to the target node, the fee is paid for the target node
        # (i.e. that target node keeps some of the money to himself, and passes the rest forwards).
        channel_fee: float = channel_data[f'node{target_i}_policy'][
            'fee_base_msat']

        if channel_fee < min_fee:
            min_fee = channel_fee
            min_fee_channel_id = (channel_data[f'node{source_i}_pub'],
                                  channel_data[f'node{target_i}_pub'],
                                  channel_data['channel_id'])

    assert (min_fee_channel_id is not None) and (
        min_fee != float('inf')), "ERROR: no channel was chosen."

    return min_fee_channel_id
예제 #11
0
def get_most_important_journal(graph:nwx.MultiGraph):
    '''
    Get a most important journal from a graph. Note that if there is several journals, you will retrieve only one
    @parameter : graph
    @return : tuple
    '''
    edge_list = list(graph.edges(data=True))
    journal_edge = {}
    unique_journal = {}
    count_journal = {}


    for (source , target, dict) in edge_list:

        if 'journal' in dict.keys():
            if dict['journal'] not in journal_edge.keys():
                journal_edge[dict['journal']] = [source , target]
            else:
                journal_edge[dict['journal']].extend([source, target])


    for key in journal_edge:
        unique_journal[key] = set(journal_edge[key])


    for key in unique_journal:
        counter = 0
        for val in unique_journal[key]:
            if(val != ""):
                counter = counter + 1

        count_journal[key] = counter

    a_famous_journal = max(count_journal.items(), key=operator.itemgetter(1))

    return a_famous_journal
예제 #12
0
def graph_from_dataframe(
    dataframe,
    threshold_by_percent_unique=0.1,
    threshold_by_count_unique=None,
    node_id_columns=[],
    node_property_columns=[],
    edge_property_columns=[],
    node_type_key="type",
    edge_type_key="type",
    collapse_edges=True,
    edge_agg_key="weight",
):
    """
    Build an undirected graph from a pandas dataframe.

    This function attempts to infer which cells should become nodes
    based on either:

        a. what percentage of the column are unique values (defaults to 10%)
        b. an explicit count of unique values (i.e. any column with 7 unique
           values or less)
        c. an explicit list of column keys (i.e.
           ['employee_id', 'location_code'])

    Column headers are preserved as node and edge 'types'. By default, this is
    stored using the key 'type' which is used by some graph import processes
    but can be reconfigured.

    This function uses a MultiGraph structure during the build phase so that it
    is possible to make multiple connections between nodes. By default, at the
    end of the build phase, the MultiGraph is converted to a Graph and the
    count of edges between each node-pair is written as a 'weight' property.

    :param pandas.DataFrame dataframe: A pandas dataframe containing the data
        to be converted into a graph.
    :param float threshold_by_percent_unique: A percent value used to determine
        whether a column should be used to generate nodes based on its
        cardinality (i.e. in a dataframe with 100 rows, treat any column with
        10 or less unique values as a node)
    :param int threshold_by_count_unique: A numeric value used to determine
        whether a column should be used to generate nodes based on its
        cardinality (i.e. if 7 is supplied, treat any column with 7 or less
        unique values as a node) - supplying a value will take priority over
        percent_unique
    :param list node_id_columns: A list of column headers to use for generating
        nodes. Suppyling any value will take precedence over
        threshold_by_percent_unique or threshold_by_count_unique. Note: this
        can cause the size of the graph to expand significantly since every
        unique value in a column will become a node.
    :param list node_property_columns: A list of column headers to use for
        generating properties of nodes. These can include the same column
        headers used for the node id.
    :param list edge_property_columns: A list of column headers to use for
        generating properties of edges.
    :param str node_type_key: A string that sets the key will be used to
        preserve the column name as node property (this is useful for importing
        networkx graphs to databases that distinguish between node 'types' or
        for visually encoding those types in plots).
    :param str edge_type_key: A string that sets the key will be used to keep
        track of edge relationships an 'types' (this is useful for importing
        networkx graphs to databases that distinguish between edge'types' or
        for visually encoding those types in plots). Edge type values are
        automatically set to <node_a_id>_<node_b_id>.
    :param bool collapse_edges: Graphs are instantiated as a 'MultiGraph'
        (allow multiple edges between nodes) and then collapsed into a 'Graph'
        which only has a single edge between any two nodes. Information is
        preserved by aggregating the count of those edges as a 'weight' value.
        Set this value to False to return the MultiGraph. Note: this can cause
        the size of the graph to expand significantly since each row can
        potentially have n! edges where n is the number of columns in the
        dataframe.
    :param str edge_agg_key: A string that sets the key the edge count will be
        assigned to when edges are aggregated.
    :returns: A networkx Graph (or MultiGraph if collapse_edges is set to
        False).
    """

    assert isinstance(
        dataframe,
        pd.DataFrame), "{} is not a pandas DataFrame".format(dataframe)

    M = MultiGraph()

    # if explicit specification of node_id_columns is provided, use those
    if len(node_id_columns) > 0:
        node_columns = node_id_columns
    else:
        # otherwise, compute with thresholds based on the dataframe
        if threshold_by_count_unique:
            node_columns = sorted([
                col for col in dataframe.columns
                if dataframe[col].nunique() <= threshold_by_count_unique
            ])
        else:
            node_columns = sorted([
                col for col in dataframe.columns
                if dataframe[col].nunique() / dataframe.shape[0] <=
                threshold_by_percent_unique  # NOQA to preserve meaningful variable names
            ])

    # use the unique values for each node column as node types
    for node_type in node_columns:
        M.add_nodes_from([(node, {
            node_type_key: node_type
        }) for node in dataframe[node_type].unique()])

    # iterate over the rows and generate an edge for each pair of node columns
    for i, row in dataframe.iterrows():
        # assemble the edge properties as a dictionary
        edge_properties = {k: row[k] for k in edge_property_columns}

        # iterate over the node_ids in each node_column of the dataframe row
        node_buffer = []
        for node_type in node_columns:
            node_id = row[node_type]

            # get a reference to the node and assign any specified properties
            node = M.nodes[node_id]
            for k in node_property_columns:
                # if values are not identical, append with a pipe delimiter
                if k not in node:
                    node[k] = row[k]
                elif isinstance(node[k], str) and str(row[k]) not in node[k]:
                    node[k] += "|{}".format(str(row[k]))
                elif str(row[k]) not in str(node[k]):
                    node[k] = str(node[k]) + "|{}".format(str(row[k]))

            # build edges using precomputed edge properties
            for other_node_id, other_node_type in node_buffer:
                # sort node_type so undirected edges all share the same type
                ordered_name = "_".join(sorted([node_type, other_node_type]))
                edge_properties[edge_type_key] = ordered_name
                M.add_edge(node_id, other_node_id, **edge_properties)

            # store the node from this column in the buffer for future edges
            node_buffer.append((node_id, node_type))

    if collapse_edges:
        # convert the MultiGraph to a Graph
        G = Graph(M)
        k = edge_agg_key
        # preserve the edge count as a sum of the weight values
        for u, v, data in M.edges(data=True):
            w = data[k] if k in data else 1.0
            edge = G[u][v]
            edge[k] = (w + edge[k]) if k in edge else w
        return G

    return M
예제 #13
0
def backtracking_1(multigraph: nx.MultiGraph,
                   x_edges,
                   y_edges,
                   vertex,
                   timeout: tuple = None,
                   global_timeout: tuple = None) -> bool:
    """

    Args:
        multigraph:
        x_edges:
        y_edges:
        vertex:
        timeout:
        global_timeout:

    Returns:

    """

    if multigraph.graph['length_z'] == len(
            x_edges) and multigraph.graph['length_w'] == len(x_edges):
        z = nx.get_edge_attributes(multigraph, 'fixed_z')
        z_edges = set(tuple(sorted(item[:2])) for item in z)
        if z_edges != x_edges and z_edges != y_edges and multigraph.graph['w'] != x_edges and multigraph.graph[
            'w'] != y_edges and \
                utils.is_hamiltonian_cycle(z_edges) and utils.is_hamiltonian_cycle(multigraph.graph['w']):
            return True
        return False

    for u, v, key, attrs in filter(
            utils.is_non_fixed_edge,
            multigraph.edges(vertex, data=True, keys=True)):
        if 'included_in_z' in multigraph.nodes[
                v if u == vertex else
                u] and multigraph.graph['length_z'] + 1 != len(x_edges):
            continue

        multigraph.edges[u, v, key]['fixed_z'] = True
        multigraph.nodes[v if u == vertex else u]['included_in_z'] = True
        multigraph.graph['length_z'] += 1

        added_to_w = []

        for u_w, v_w, key_w, _ in filter(
                utils.is_non_fixed_edge,
                multigraph.edges(vertex, data=True, keys=True)):
            multigraph.edges[u_w, v_w, key_w]['fixed_w'] = True
            added_to_w.append((u_w, v_w, key_w))
            multigraph.graph['length_w'] += 1
            multigraph.graph['w'].add(tuple(sorted((u_w, v_w))))

        if added_to_w and utils.has_cycle(multigraph.graph['w']) and (
                multigraph.graph['length_w'] != len(x_edges)
                or not utils.is_hamiltonian_cycle(multigraph.graph['w'])):
            step_back(multigraph, u, v, key, vertex, added_to_w)
            continue

        if backtracking_1(multigraph,
                          x_edges,
                          y_edges,
                          v,
                          timeout=timeout,
                          global_timeout=global_timeout):
            return True

        step_back(multigraph, u, v, key, vertex, added_to_w)

    return False
예제 #14
0
def get_route(graph: nx.MultiGraph,
              source_id,
              target_id,
              amount: int,
              max_hops: int = 20):
    """
    Given a target node and a list of nodes, compute the route from any node in the list to the target node.
    The route is calculated 'backwards-Dijkstra' - from the target node until each one of the nodes.

    :param graph: The graph describing the network.
    :param source_id: The source node.
    :param target_id: The target node.
    :param amount: Amount (in milli-satoshis) to transfer.
                   Note that this is the amount of money that should reach the target node eventually,
                   and more money will be added in order to pay the fees on the route.
    :param max_hops: Maximal number of intermediate nodes in the route.

    :return: A triplet representing the selected route:
                 (1) The path starting from this node to the target node (a list of edges).
    """
    # Initialize the 'amount_node_needs' and 'weight' attributes of each node as INFINITY.
    # These values will be changed during the run of the routing algorithm.
    nx.set_node_attributes(graph, np.inf, 'amount_node_needs')
    nx.set_node_attributes(graph, np.inf, 'weight')

    # Initialize 'path_to_target' for each node in the graph - it's a list containing the path from it to 'target'.
    # Note that we can't use set_node_attributes with mutable types like a list
    # (as this list will be shared among all nodes which is undesirable).
    for node in graph.nodes:
        graph.nodes[node]['path_to_target'] = list()

    unvisited_nodes = UpdatablePrioritySet()

    # Initialize the target attributes with its desired values.
    graph.nodes[target_id][
        'amount_node_needs'] = amount  # The amount 'target' needs to get is the given 'amount'.
    graph.nodes[target_id][
        'weight'] = 0  # The weight of the path starting and ending in 'target' is 0.

    # Add the target node to the unvisited_nodes data-structure with weight 0.
    unvisited_nodes.update(target_id, None, 0)

    # Iterate as long as there is some unvisited nodes we need to visit.
    while not unvisited_nodes.is_empty():

        # Extract the node in the unvisited_nodes data-structure
        # that has a path starting from it and ending in 'target'  with a minimal weight.
        # This node will be a 'receiver' of money that it needs to transfer further to 'target',
        # that's why it's named 'receiver_node'.
        receiver_node_id = unvisited_nodes.pop()

        # If the receiver node is the source node, we can finish and return the path it has,
        # because we know this path is the path with minimal weight among all paths
        # that start in 'source' and end in 'target'.
        if receiver_node_id == source_id:
            return graph.nodes[source_id]['path_to_target']

        # Go over all the neighbors of this receiver node, and for each one the weight in the heap might need updating.
        receiver_node_edges = graph.edges(receiver_node_id, data=True)
        for _, _, edge_data in receiver_node_edges:
            sender_node_policy, sender_node_id = utils.common.get_sender_policy_and_id(
                receiver_node_id, edge_data)
            edge_key = (sender_node_id, receiver_node_id,
                        edge_data['channel_id'])

            # Calculate the weight of the path starting at 'sender' and ending at 'target',
            # passing first through the current 'receiver' (and continuing to target from there).
            # The amount that 'sender' needs to get in order to perform this multi-hop transfer is also calculated.
            amount_sender_needs, weight = lnd_weight(
                sender_node_policy,
                amount=graph.nodes[receiver_node_id]['amount_node_needs'],
                prev_weight=graph.nodes[receiver_node_id]['weight'])

            sender_node_data = graph.nodes[sender_node_id]
            receiver_node_data = graph.nodes[receiver_node_id]

            # If the weight of the path starting at the neighbor and passing through the receiver_node is lower than
            # the current weight of the path that is saved for the neighbor.
            if weight < sender_node_data['weight']:
                # Update the weight of the path in the heap accordingly, since it's lower.
                # Do not add to the unvisited nodes data-structure the new path if it's too long.
                # Minus 1 because path is a list of edges and not a list of nodes, and #edges = #nodes + 1.
                if len(receiver_node_data['path_to_target']) < max_hops - 1:
                    unvisited_nodes.update(sender_node_id,
                                           sender_node_data['weight'],
                                           new_priority=weight)

                # Update the attributes of the node in the graph itself, to use in later iterations.
                sender_node_data['weight'] = weight
                sender_node_data['amount_node_needs'] = amount_sender_needs
                sender_node_data['path_to_target'] = [
                    edge_key
                ] + receiver_node_data['path_to_target']

    return None  # No route was found transferring 'amount' from 'source' to 'target'.