def simplify(streckennetz: nx.MultiGraph) -> nx.MultiGraph: print('simplifying...') # Remove the shorter edge of two parallel edges edges_to_remove = [] for edges_index in streckennetz.edges(): edges = streckennetz[edges_index[0]][edges_index[1]] if len(edges) > 1: min_length = edges[0]['length'] min_length_index = 0 for i in edges: if edges[i]['length'] < min_length: min_length = edges[i]['length'] min_length_index = i edges_to_remove.append(list(edges_index) + [min_length_index]) print('found', len(edges_to_remove), 'parallel edges') streckennetz.remove_edges_from(edges_to_remove) # Remove nodes, that only have a single edge while True: nodes_to_remove = [] for node in streckennetz.nodes(): if streckennetz.degree( node) < 2 and 'type' not in streckennetz.nodes[node]: nodes_to_remove.append(node) if nodes_to_remove: print('found', len(nodes_to_remove), 'deadend nodes') streckennetz.remove_nodes_from(nodes_to_remove) nodes_to_remove = [] else: break return streckennetz
def _get_fbvs_max_size(self, g: MultiGraph, k: int) -> set: # Exhaustively apply reductions k, x0 = self.apply_reductions(g, k) # Originally reduction 5: if k < 0, terminate the algorithm and conclude that # (G, k) is a no-instance. if k < 0: return None # If G is an empty graph, then we return soln_redux if len(g) == 0: return x0 # Pick a random edge, then a random end node of that edge rand_edge = choice(g.edges()) v = choice(rand_edge) # We recurse on (G - v, k − 1). xn = self._get_fbvs_max_size(graph_minus(g, {v}), k - 1) if xn is None: # If the recursive step returns a failure, then we return a failure as well. return None else: # If the recursive step returns a feedback vertex set Xn, then we return X = Xn ∪ {v} ∪ X0. return xn.union({v}).union(x0)
def test_relabel_nodes_multigraph(): """failed after switching to dg.relabel_nodes""" G = MultiGraph([('a', 'b'), ('a', 'b')]) mapping = {'a': 'aardvark', 'b': 'bear'} G = relabel_nodes(G, mapping, copy=False) assert sorted(G.nodes()) == ['aardvark', 'bear'] assert_edges_equal(sorted(G.edges()), [('aardvark', 'bear'), ('aardvark', 'bear')])
def graph_minus(g: MultiGraph, w: set) -> MultiGraph: gx = MultiGraph() for (n1, n2) in g.edges(): if n1 not in w and n2 not in w: gx.add_edge(n1, n2) for n in g.nodes(): if n not in w: gx.add_node(n) return gx
def generate_snapshots_over_time(G: nx.MultiGraph, minutes=0, hours=0, days=0, max_snapshots=None, interval=None, include_final=False, cummulative=True): """ :param G: MultiGraph you want to watch over time, with "created_at" edge attribute :param minutes: Number of minutes between two snapshots (Default 0) :param hours: Number of hours between two snapshots (Default 0) :param days: Number of days between two snapshots (Default 0) :param max_snapshots: Maximum number of generated snapshots (Default None - all snapshots are created) :param interval: Tuple (start, end) specifying in which interval to generate snapshots (Default None - takes min, max created_at date of G) :param include_final: If True, set G as value for max_date (Default False) :return: A dict with timestamps """ if nx.get_edge_attributes(G, "created_at") == {}: raise Exception("Graph needs 'created_at' edge attribute") if minutes < 0 or hours < 0 or days < 0 or minutes + hours + days <= 0: raise Exception("Illegal minutes, hours or days values") edges = G.edges(data=True) created_ats = [attr["created_at"] for (_, _, attr) in edges] if interval is None: date = str_to_datetime(min(created_ats)) max_date = str_to_datetime(max(created_ats)) else: date = str_to_datetime(interval[0]) max_date = str_to_datetime(interval[1]) snapshots = {} while date <= max_date and (max_snapshots is None or len(snapshots) < max_snapshots): if cummulative: edges_snapshot = [(a, b) for (a, b, attr) in edges if str_to_datetime(attr["created_at"]) <= date] else: edges_snapshot = [ (a, b) for (a, b, attr) in edges if (date <= str_to_datetime(attr["created_at"]) <= date + timedelta(minutes=minutes, hours=hours, days=days)) ] nodes_snapshot = list(sum(edges_snapshot, ())) G_snapshot = G.subgraph(nodes_snapshot) snapshots[date] = reduce_multi_graph(G_snapshot) date += timedelta(minutes=minutes, hours=hours, days=days) if include_final: snapshots[max_date] = reduce_multi_graph(G) return snapshots
def reduce_multi_graph(graph: nx.MultiGraph, thresh=0): """ :return: A non-multi graph from a multi graph, where the number of edges between two nodes is preserved in an edge attribute "weight" """ width_dict = Counter([tuple(sorted(edge)) for edge in graph.edges()]) edge_width = [(u, v, value) for ((u, v), value) in width_dict.items()] all_int_weighted = pd.DataFrame(edge_width, columns=["user1", "user2", "weight"]) all_int_weighted_high = all_int_weighted[all_int_weighted.weight >= thresh] return nx.from_pandas_edgelist(all_int_weighted_high, source="user1", target="user2", edge_attr=True)
def get_edge_attr_feature_matrix(self, G:nx.MultiGraph, doc, embd_dim=96, embedding_type=None, as_torch=True, is_padding_pos=True, **kwargs): """ Edge feature matrix wish shape [num_edges, edge_feat_dim]""" assert G is not None EDV, EV = G.edges(data=True), G.edges(data=False) E = len(EDV) M = (embd_dim+3) if is_padding_pos else embd_dim token2vec = {} # RS N.b.: Gt side token 'pos' are captured in node attr, duplicates are overridden for token in doc: token2vec[token.text] = token.vector feat_mats = [] for i, (_, _, feat_dict) in enumerate(EDV): for key, value in feat_dict.items(): v_embd = token2vec[value] v_embd = v_embd.reshape((-1, embd_dim)) #logger.debug(f"value = {value}\n v_embd = {v_embd}") if is_padding_pos: v_embd = np.pad(v_embd, ((0,0), (0, 3)), 'constant', constant_values=(0, 0.0)) # [1, embd_dim+3] feat_mats.append(v_embd) #unique_values = set(reduce(lambda x1, x2: x1 + x2, data.values)) if len(feat_mats) > 1: feat_mats = reduce(lambda a, b: np.vstack((a, b)), feat_mats) else: feat_mats = feat_mats[0].reshape((1, -1)) if as_torch: feat_mats = torch.from_numpy(feat_mats).float() if kwargs.get('is_cuda'): device = 'cuda' if torch.cuda.is_available() else 'cpu' feat_mats = feat_mats.to(device) try: # Ensure single edges are properly reshaped assert feat_mats.shape == (E, M) except AssertionError as ae: ## RuntimeError: Edge indices and edge attributes hold a differing number of edges, logger.debug(f"{ae}\n feat_mats.shape = {feat_mats.shape}") return feat_mats
def subgraph_by_timestamp(mg: nx.MultiGraph, start: int, end: int) -> nx.Graph: edges = filter( lambda edge: start <= edge[2]["timestamp"] and edge[2]["timestamp"] < end, mg.edges(data=True), ) g = nx.Graph() for node in mg.nodes(): g.add_node(node) for u, v, data in edges: if g.has_edge(u, v): g[u][v]["weight"] += data["weight"] else: g.add_edge(u, v, weight=data["weight"]) return g
def get_channel_with_minimal_fee_base(subgraph: nx.MultiGraph, source, target): """ Get a sub-graph containing exactly two nodes - one is the source and the other is the destination. The function calculate the channel with the minimal base-fee and returns it. :param subgraph: The MultiGraph containing the nodes src and dest. :param source: The source node. :param target: The target node. :return: The channel with the minimal base-fee. """ assert set(subgraph.nodes) == {source, target}, \ "BAD USAGE - you should give a graph containing only the two given nodes" min_fee: float = float('inf') min_fee_channel_id = None for node1, node2, channel_data in subgraph.edges(data=True): if source == channel_data['node1_pub']: source_i, target_i = 1, 2 elif source == channel_data['node2_pub']: source_i, target_i = 2, 1 else: assert False, "WTF? Neither 'source' nor 'target' are in the channel." # When money is transferred from the source node to the target node, the fee is paid for the target node # (i.e. that target node keeps some of the money to himself, and passes the rest forwards). channel_fee: float = channel_data[f'node{target_i}_policy'][ 'fee_base_msat'] if channel_fee < min_fee: min_fee = channel_fee min_fee_channel_id = (channel_data[f'node{source_i}_pub'], channel_data[f'node{target_i}_pub'], channel_data['channel_id']) assert (min_fee_channel_id is not None) and ( min_fee != float('inf')), "ERROR: no channel was chosen." return min_fee_channel_id
def get_most_important_journal(graph:nwx.MultiGraph): ''' Get a most important journal from a graph. Note that if there is several journals, you will retrieve only one @parameter : graph @return : tuple ''' edge_list = list(graph.edges(data=True)) journal_edge = {} unique_journal = {} count_journal = {} for (source , target, dict) in edge_list: if 'journal' in dict.keys(): if dict['journal'] not in journal_edge.keys(): journal_edge[dict['journal']] = [source , target] else: journal_edge[dict['journal']].extend([source, target]) for key in journal_edge: unique_journal[key] = set(journal_edge[key]) for key in unique_journal: counter = 0 for val in unique_journal[key]: if(val != ""): counter = counter + 1 count_journal[key] = counter a_famous_journal = max(count_journal.items(), key=operator.itemgetter(1)) return a_famous_journal
def graph_from_dataframe( dataframe, threshold_by_percent_unique=0.1, threshold_by_count_unique=None, node_id_columns=[], node_property_columns=[], edge_property_columns=[], node_type_key="type", edge_type_key="type", collapse_edges=True, edge_agg_key="weight", ): """ Build an undirected graph from a pandas dataframe. This function attempts to infer which cells should become nodes based on either: a. what percentage of the column are unique values (defaults to 10%) b. an explicit count of unique values (i.e. any column with 7 unique values or less) c. an explicit list of column keys (i.e. ['employee_id', 'location_code']) Column headers are preserved as node and edge 'types'. By default, this is stored using the key 'type' which is used by some graph import processes but can be reconfigured. This function uses a MultiGraph structure during the build phase so that it is possible to make multiple connections between nodes. By default, at the end of the build phase, the MultiGraph is converted to a Graph and the count of edges between each node-pair is written as a 'weight' property. :param pandas.DataFrame dataframe: A pandas dataframe containing the data to be converted into a graph. :param float threshold_by_percent_unique: A percent value used to determine whether a column should be used to generate nodes based on its cardinality (i.e. in a dataframe with 100 rows, treat any column with 10 or less unique values as a node) :param int threshold_by_count_unique: A numeric value used to determine whether a column should be used to generate nodes based on its cardinality (i.e. if 7 is supplied, treat any column with 7 or less unique values as a node) - supplying a value will take priority over percent_unique :param list node_id_columns: A list of column headers to use for generating nodes. Suppyling any value will take precedence over threshold_by_percent_unique or threshold_by_count_unique. Note: this can cause the size of the graph to expand significantly since every unique value in a column will become a node. :param list node_property_columns: A list of column headers to use for generating properties of nodes. These can include the same column headers used for the node id. :param list edge_property_columns: A list of column headers to use for generating properties of edges. :param str node_type_key: A string that sets the key will be used to preserve the column name as node property (this is useful for importing networkx graphs to databases that distinguish between node 'types' or for visually encoding those types in plots). :param str edge_type_key: A string that sets the key will be used to keep track of edge relationships an 'types' (this is useful for importing networkx graphs to databases that distinguish between edge'types' or for visually encoding those types in plots). Edge type values are automatically set to <node_a_id>_<node_b_id>. :param bool collapse_edges: Graphs are instantiated as a 'MultiGraph' (allow multiple edges between nodes) and then collapsed into a 'Graph' which only has a single edge between any two nodes. Information is preserved by aggregating the count of those edges as a 'weight' value. Set this value to False to return the MultiGraph. Note: this can cause the size of the graph to expand significantly since each row can potentially have n! edges where n is the number of columns in the dataframe. :param str edge_agg_key: A string that sets the key the edge count will be assigned to when edges are aggregated. :returns: A networkx Graph (or MultiGraph if collapse_edges is set to False). """ assert isinstance( dataframe, pd.DataFrame), "{} is not a pandas DataFrame".format(dataframe) M = MultiGraph() # if explicit specification of node_id_columns is provided, use those if len(node_id_columns) > 0: node_columns = node_id_columns else: # otherwise, compute with thresholds based on the dataframe if threshold_by_count_unique: node_columns = sorted([ col for col in dataframe.columns if dataframe[col].nunique() <= threshold_by_count_unique ]) else: node_columns = sorted([ col for col in dataframe.columns if dataframe[col].nunique() / dataframe.shape[0] <= threshold_by_percent_unique # NOQA to preserve meaningful variable names ]) # use the unique values for each node column as node types for node_type in node_columns: M.add_nodes_from([(node, { node_type_key: node_type }) for node in dataframe[node_type].unique()]) # iterate over the rows and generate an edge for each pair of node columns for i, row in dataframe.iterrows(): # assemble the edge properties as a dictionary edge_properties = {k: row[k] for k in edge_property_columns} # iterate over the node_ids in each node_column of the dataframe row node_buffer = [] for node_type in node_columns: node_id = row[node_type] # get a reference to the node and assign any specified properties node = M.nodes[node_id] for k in node_property_columns: # if values are not identical, append with a pipe delimiter if k not in node: node[k] = row[k] elif isinstance(node[k], str) and str(row[k]) not in node[k]: node[k] += "|{}".format(str(row[k])) elif str(row[k]) not in str(node[k]): node[k] = str(node[k]) + "|{}".format(str(row[k])) # build edges using precomputed edge properties for other_node_id, other_node_type in node_buffer: # sort node_type so undirected edges all share the same type ordered_name = "_".join(sorted([node_type, other_node_type])) edge_properties[edge_type_key] = ordered_name M.add_edge(node_id, other_node_id, **edge_properties) # store the node from this column in the buffer for future edges node_buffer.append((node_id, node_type)) if collapse_edges: # convert the MultiGraph to a Graph G = Graph(M) k = edge_agg_key # preserve the edge count as a sum of the weight values for u, v, data in M.edges(data=True): w = data[k] if k in data else 1.0 edge = G[u][v] edge[k] = (w + edge[k]) if k in edge else w return G return M
def backtracking_1(multigraph: nx.MultiGraph, x_edges, y_edges, vertex, timeout: tuple = None, global_timeout: tuple = None) -> bool: """ Args: multigraph: x_edges: y_edges: vertex: timeout: global_timeout: Returns: """ if multigraph.graph['length_z'] == len( x_edges) and multigraph.graph['length_w'] == len(x_edges): z = nx.get_edge_attributes(multigraph, 'fixed_z') z_edges = set(tuple(sorted(item[:2])) for item in z) if z_edges != x_edges and z_edges != y_edges and multigraph.graph['w'] != x_edges and multigraph.graph[ 'w'] != y_edges and \ utils.is_hamiltonian_cycle(z_edges) and utils.is_hamiltonian_cycle(multigraph.graph['w']): return True return False for u, v, key, attrs in filter( utils.is_non_fixed_edge, multigraph.edges(vertex, data=True, keys=True)): if 'included_in_z' in multigraph.nodes[ v if u == vertex else u] and multigraph.graph['length_z'] + 1 != len(x_edges): continue multigraph.edges[u, v, key]['fixed_z'] = True multigraph.nodes[v if u == vertex else u]['included_in_z'] = True multigraph.graph['length_z'] += 1 added_to_w = [] for u_w, v_w, key_w, _ in filter( utils.is_non_fixed_edge, multigraph.edges(vertex, data=True, keys=True)): multigraph.edges[u_w, v_w, key_w]['fixed_w'] = True added_to_w.append((u_w, v_w, key_w)) multigraph.graph['length_w'] += 1 multigraph.graph['w'].add(tuple(sorted((u_w, v_w)))) if added_to_w and utils.has_cycle(multigraph.graph['w']) and ( multigraph.graph['length_w'] != len(x_edges) or not utils.is_hamiltonian_cycle(multigraph.graph['w'])): step_back(multigraph, u, v, key, vertex, added_to_w) continue if backtracking_1(multigraph, x_edges, y_edges, v, timeout=timeout, global_timeout=global_timeout): return True step_back(multigraph, u, v, key, vertex, added_to_w) return False
def get_route(graph: nx.MultiGraph, source_id, target_id, amount: int, max_hops: int = 20): """ Given a target node and a list of nodes, compute the route from any node in the list to the target node. The route is calculated 'backwards-Dijkstra' - from the target node until each one of the nodes. :param graph: The graph describing the network. :param source_id: The source node. :param target_id: The target node. :param amount: Amount (in milli-satoshis) to transfer. Note that this is the amount of money that should reach the target node eventually, and more money will be added in order to pay the fees on the route. :param max_hops: Maximal number of intermediate nodes in the route. :return: A triplet representing the selected route: (1) The path starting from this node to the target node (a list of edges). """ # Initialize the 'amount_node_needs' and 'weight' attributes of each node as INFINITY. # These values will be changed during the run of the routing algorithm. nx.set_node_attributes(graph, np.inf, 'amount_node_needs') nx.set_node_attributes(graph, np.inf, 'weight') # Initialize 'path_to_target' for each node in the graph - it's a list containing the path from it to 'target'. # Note that we can't use set_node_attributes with mutable types like a list # (as this list will be shared among all nodes which is undesirable). for node in graph.nodes: graph.nodes[node]['path_to_target'] = list() unvisited_nodes = UpdatablePrioritySet() # Initialize the target attributes with its desired values. graph.nodes[target_id][ 'amount_node_needs'] = amount # The amount 'target' needs to get is the given 'amount'. graph.nodes[target_id][ 'weight'] = 0 # The weight of the path starting and ending in 'target' is 0. # Add the target node to the unvisited_nodes data-structure with weight 0. unvisited_nodes.update(target_id, None, 0) # Iterate as long as there is some unvisited nodes we need to visit. while not unvisited_nodes.is_empty(): # Extract the node in the unvisited_nodes data-structure # that has a path starting from it and ending in 'target' with a minimal weight. # This node will be a 'receiver' of money that it needs to transfer further to 'target', # that's why it's named 'receiver_node'. receiver_node_id = unvisited_nodes.pop() # If the receiver node is the source node, we can finish and return the path it has, # because we know this path is the path with minimal weight among all paths # that start in 'source' and end in 'target'. if receiver_node_id == source_id: return graph.nodes[source_id]['path_to_target'] # Go over all the neighbors of this receiver node, and for each one the weight in the heap might need updating. receiver_node_edges = graph.edges(receiver_node_id, data=True) for _, _, edge_data in receiver_node_edges: sender_node_policy, sender_node_id = utils.common.get_sender_policy_and_id( receiver_node_id, edge_data) edge_key = (sender_node_id, receiver_node_id, edge_data['channel_id']) # Calculate the weight of the path starting at 'sender' and ending at 'target', # passing first through the current 'receiver' (and continuing to target from there). # The amount that 'sender' needs to get in order to perform this multi-hop transfer is also calculated. amount_sender_needs, weight = lnd_weight( sender_node_policy, amount=graph.nodes[receiver_node_id]['amount_node_needs'], prev_weight=graph.nodes[receiver_node_id]['weight']) sender_node_data = graph.nodes[sender_node_id] receiver_node_data = graph.nodes[receiver_node_id] # If the weight of the path starting at the neighbor and passing through the receiver_node is lower than # the current weight of the path that is saved for the neighbor. if weight < sender_node_data['weight']: # Update the weight of the path in the heap accordingly, since it's lower. # Do not add to the unvisited nodes data-structure the new path if it's too long. # Minus 1 because path is a list of edges and not a list of nodes, and #edges = #nodes + 1. if len(receiver_node_data['path_to_target']) < max_hops - 1: unvisited_nodes.update(sender_node_id, sender_node_data['weight'], new_priority=weight) # Update the attributes of the node in the graph itself, to use in later iterations. sender_node_data['weight'] = weight sender_node_data['amount_node_needs'] = amount_sender_needs sender_node_data['path_to_target'] = [ edge_key ] + receiver_node_data['path_to_target'] return None # No route was found transferring 'amount' from 'source' to 'target'.