def get_graph(filename, with_root=False): DG = nx.DiGraph() f = open(filename, 'r') line = None edges = [] coordinates = [] terms = [] if with_root: root = None while line != 'EOF': line = f.readline().strip() toks = line.split(' ') if toks[0] == 'A': t = tuple(int(x) for x in toks[1:]) edges.append(t) if toks[0] == 'T': terms.append(int(toks[1])) if toks[0] == 'Root': if with_root: root = int(toks[1]) if toks[0] == 'DD': t = tuple(int(x) for x in toks[1:]) coordinates.append(t) for coord in coordinates: DG.add_node(coord[0], pos=(coord[1], coord[2])) terms.sort() DG.add_weighted_edges_from(edges) # print_graph(DG) # nx.draw(DG, node_size=50) # plt.show() # f.close() if with_root: return DG, terms, root else: print_graph(DG) max_len = 0 max_node = None for node in nx.nodes(DG): # print(node, tr_cl.out_edges(node)) descs = nx.descendants(DG, node) # desc_numb = len(descs) if len(set(terms) & set(descs)) == len(descs): # max_len = desc_numb max_node = node if max_len == len(nx.nodes(DG)): return DG, terms, max_node else: reachable = set(nx.descendants(DG, max_node)) | {max_node} unreachable = set(nx.nodes(DG)) - reachable for node in unreachable: DG.remove_node(node) terms = list(set(terms) & reachable) print('terms =', len(terms)) return DG, terms, max_node
def descendants(self, nbunch): self.validate_input_nodes(nbunch) if not self.acceptable_iterable(nbunch): #single input node return nx.descendants(self, nbunch) else: if len(nbunch) == 1: #still a single node return nx.descendants(self, nbunch[0]) else: #multiple input nodes DG = self.copy() s = DG.add_node_unique() for node in nbunch: DG.add_edge(s, node) # this automatically adds s to DG too return nx.descendants(DG, s) - set(nbunch) # returns a SET
def _calculate_scores(self): """Calculate the 'value' of each node in the graph based on how many blocking descendants it has. We use this score for the internal priority queue's ordering, so the quality of this metric is important. The score is stored as a negative number because the internal PriorityQueue picks lowest values first. We could do this in one pass over the graph instead of len(self.graph) passes but this is easy. For large graphs this may hurt performance. This operates on the graph, so it would require a lock if called from outside __init__. :return Dict[str, int]: The score dict, mapping unique IDs to integer scores. Lower scores are higher priority. """ scores = {} for node in self.graph.nodes(): score = -1 * len([ d for d in nx.descendants(self.graph, node) if self._include_in_cost(d) ]) scores[node] = score return scores
def _resolve_update_list(self, changed_properties): """ Returns a list of all plasma models which are affected by the changed_modules due to there dependency in the the plasma_graph. Parameters ---------- changed_modules: ~list all modules changed in the plasma Returns ------- : ~list all affected modules. """ descendants_ob = [] for plasma_property in changed_properties: node_name = self.outputs_dict[plasma_property].name descendants_ob += nx.descendants(self.graph, node_name) descendants_ob = list(set(descendants_ob)) sort_order = nx.topological_sort(self.graph) descendants_ob.sort(key=lambda val: sort_order.index(val)) logger.debug("Updating modules in the following order:".format("->".join(descendants_ob))) return descendants_ob
def filter_graph(graph): from_s = nx.descendants(graph, start_id) from_s.add(start_id) to_e = nx.ancestors(graph, end_id) to_e.add(end_id) del_cross = (from_s | to_e) - (from_s & to_e) graph.remove_nodes_from(del_cross)
def as_dependency_list(self, limit_to=None): """returns a list of list of nodes, eg. [[0,1], [2], [4,5,6]]. Each element contains nodes whose dependenices are subsumed by the union of all lists before it. In this way, all nodes in list `i` can be run simultaneously assuming that all lists before list `i` have been completed""" if limit_to is None: graph_nodes = set(self.graph.nodes()) else: graph_nodes = set() for node in limit_to: graph_nodes.add(node) if node in self.graph: graph_nodes.update(nx.descendants(self.graph, node)) else: raise RuntimeError("Couldn't find model '{}' -- does it exist or is it diabled?".format(node)) depth_nodes = defaultdict(list) for node in graph_nodes: num_ancestors = len(nx.ancestors(self.graph, node)) depth_nodes[num_ancestors].append(node) dependency_list = [] for depth in sorted(depth_nodes.keys()): dependency_list.append(depth_nodes[depth]) return dependency_list
def _determine_t_death(tree, target): # find the time of parent and the distance from it parent = tree.predecessors(target)[0] start_dist = tree.edge[parent][target]['distance'] start_time = tree.node[parent]['t_death'] # build list of descendants within the same species descendants = [n for n in nx.descendants(tree, target) if tree.node[n]['S'] == tree.node[target]['S']] # find the most distant descendant with 't_death' label distances_times = [] for node in descendants: distance_time = ( nx.shortest_path_length(tree, source=target, target=node, weight='distance'), tree.node[node].get('t_death', None) ) distances_times.append(distance_time) # max_dist = max(distances) distances_times.sort(key=lambda x: x[0]) end_dist, end_time = distances_times[-1] # t_death for node is between that of parent and descendant # proportionate to the distance to each t_death = start_time + (end_time - start_time) * (start_dist / (start_dist + end_dist)) tree.node[target]['t_death'] = t_death
def OnClick(self, node_id): self.color_nodes() self._current_node_id = node_id node_ea = self[node_id] self._remove_target_handler.unregister() self._disable_source_handler.unregister() self._enable_source_handler.unregister() if node_ea in self._targets: self._remove_target_handler.register() self._attach_to_popup(self._remove_target_handler.get_name()) for ea in nx.ancestors(self._lca_graph, node_ea): if ea not in self._targets and ea not in self._sources: self._set_node_bg_color(self._node_ids[ea], COLOR_PATH) if node_ea in self._sources: if node_ea in self._disabled_sources: self._enable_source_handler.register() self._attach_to_popup(self._enable_source_handler.get_name()) else: self._disable_source_handler.register() self._attach_to_popup(self._disable_source_handler.get_name()) for ea in nx.descendants(self._lca_graph, node_ea): if ea not in self._targets and ea not in self._sources: self._set_node_bg_color(self._node_ids[ea], COLOR_PATH) return False
def out_component(G, source): '''rather than following the pseudocode in figure 6.15 of Kiss, Miller & Simon, this uses a built in networkx command. I plan to improve this algorithm. finds the set of nodes (including source) which are reachable from nodes in source. Parameters ---------- G : NetworkX Graph The network the disease will transmit through. source : either a node or an iterable of nodes (set, list, tuple) The nodes from which the infections start. Returns ------- reachable_nodes : set the set of nodes reachable from source (including source). ''' try: #testing whether this is an iterable iterator = iter(source) except TypeError: #It's not an iterable. It "must" be a node. if G.has_node(source): source_nodes = set([source]) else: #it's an iterable. source_nodes = set(source) reachable_nodes = set([]) for node in source_nodes: reachable_nodes = reachable_nodes.union(set(nx.descendants(G, node))) return reachable_nodes
def reset(self, cell): if cell.value is None: return cell.value = None for descendant in descendants(self.graph, cell): if isinstance(descendant, CellRange) or descendant.formula: descendant.value = None
def print_impacting_modules(single_node=None, json_out=None): """ For each module, print a list of modules that the module is depending on, i.e. modules whose change can potentially impact the module. The function shows all levels of dependency, not just the immediately imported modules. If the json_out argument is not None, then the output will be recorded there instead of on stdout. :return: """ if json_out is None: print('\n===Impacting Modules===') else: json_out['impacting_modules'] = {} for node_name in G.nodes_iter(): if single_node and (node_name!=single_node): continue descendants = nx.descendants(G, node_name) if json_out is None: print(augment_format_string(node_name, '\n%s:') % node_name) else: json_out['impacting_modules'][node_name] = [] for d in descendants: if json_out is None: print(augment_format_string(d, ' %s') % d) else: json_out['impacting_modules'][node_name].append(d)
def mark_reachable_nodes(ea, source_color=COLOR_SOURCE, other_color=COLOR_REACHABLE): graph = get_nx_graph(ea) block_ea = get_block_start(ea) for descendant in nx.descendants(graph, block_ea): CodeBlock(descendant).color = other_color CodeBlock(ea).color = source_color
def parents(self, gid): """Return direct asscendants in the hierarchy for this GeoName ID. If the location has not parents in the hierarchy it will attempt to find them nonetheless using the following algorithm: 1. Find all descendants 2. Find the 1000 nearest locations, if any of them has the same name or has more population and it's not a descendant then it's the new parent. The descendants check is to avoid loops in the hierarchy. """ try: p = self._hierarchy.predecessors(gid) except nx.NetworkXError: p = [] if not p and gid not in self._root: name = self.name(gid) population = self.population(gid) try: descendants = nx.descendants(self._hierarchy, gid) except nx.NetworkXError: descendants = set() for neighbor in self.nearest(gid, 1000): match_name = self.name(neighbor) == name bigger = (population > 0) and (self.population(neighbor) > population) if ((match_name or bigger) and (neighbor not in descendants)): p.append(neighbor) self._hierarchy.add_edge(neighbor, gid) break if not p: self._root.add(gid) return p
def subtree(G, node): GS = G.copy() GS.remove_node(node) sd = nx.descendants(G, node) sd.add(node) s = set(sd) S = G.subgraph(s).copy() for n in sd: if n == node: continue ns = nx.ancestors(GS, n) if not ns.issubset(sd): S.remove_node(n) s.discard(n) pn = set(G.predecessors_iter(node)) gs = set( itertools.chain.from_iterable( nx.shortest_path(G, "REPO", n) for n in pn )) GS = G.subgraph(gs.union(s)).copy() for n in pn.difference(s): GS.node[n]["fontcolor"] = "#FF0000" for n in s: GS.node[n]["fontcolor"] = "#006600" GS.remove_node("REPO") return S, GS
def modify_downstream_edges_faster(G,source,modified_edges,time_to_solve,og_delay): # downstream_nodes = nx.descendants(G,source) # for node in downstream_nodes: # # #Getting incoming edges to this node. # in_edges = G.in_edges(node) # #Get the weights of in_edges. # weights = [z for x,y,z in in_edges] # #The maximum weight (which is when this downstream node is ready to solve) # ready_to_solve = copy(max(weights)) # # for u,v in in_edges: # if (u == source or u in downstream_nodes): # if not modified_edges: # G[u][v]['weight'] += delay # modified_edges.append((u,v)) # elif (u,v) not in modified_edges: # G[u][v]['weight'] += delay # modified_edges.append((u,v)) downstream_nodes = list(nx.descendants(G,source)) #Add the source node to the downstream nodes. downstream_nodes = [source] + downstream_nodes num_downstream_nodes = len(downstream_nodes) #We get when each downstream node is ready to solve. ready_to_solve_all = {} for n in range(0,num_downstream_nodes): current_node = downstream_nodes[n] #Get incoming edge with the maximum weight to this node. ready_to_solve_all[current_node] = get_max_incoming_weight(G,current_node) #Sorting the downstream nodes in order of when they solve. ready_to_solve_all = dict(sorted(ready_to_solve_all.items(),key=lambda x:x[1])) for k,val in ready_to_solve_all.items(): #The current node. node = k #When the current node is ready to solve. ready_to_solve = val #Get outgoing edges of this node. out_edges = G.out_edges(node) for u,v in out_edges: if (v in downstream_nodes): if not modified_edges: delay = time_to_solve[node] + ready_to_solve - G[u][v]['weight'] if delay > 0.0: G[u][v]['weight'] += delay ready_to_solve_all[v] = get_max_incoming_weight(G,v) #modified_edges.append((u,v)) elif (u,v) not in modified_edges: delay = time_to_solve[node] + ready_to_solve - G[u][v]['weight'] if delay > 0.0: G[u][v]['weight'] += delay ready_to_solve_all[v] = get_max_incoming_weight(G,v) #modified_edges.append((u,v)) return G
def subgraph_needed_for(self, start_at, end_at): """Find the subgraph of all dependencies to run these tasks. Returns a new graph. """ assert start_at or end_at, "one of {start_at,end_at} must be a task id" start, end = map(self.task_dict.get, [start_at, end_at]) if None in [start, end]: graph = self.get_networkx_graph() if start: task_subset = nx.descendants(graph, start) task_subset.add(start) elif end: task_subset = nx.ancestors(graph, end) task_subset.add(end) elif start == end: task_subset = set([start]) else: graph = self.get_networkx_graph() task_subset = set() for path in nx.all_simple_paths(graph, start, end): task_subset.update(path) # make sure the tasks are added to the subgraph in the same # order as the original configuration file tasks_kwargs_list = [task.yaml_data for task in self.task_list if task in task_subset] subgraph = TaskGraph(self.config_path, tasks_kwargs_list) return subgraph
def forward_reachable(self, state): """Return states reachable from given state. Iterated post(), a wrapper of networkx.descendants. """ descendants = nx.descendants(self, state) return descendants
def getReachability(G): numNodes = len(G.nodes()) # Obtains all the reability values for the nodes in the graph reaches = list(imap(lambda node: len(nx.descendants(G, node))/numNodes, G.nodes())) avgReach = mean(reaches) indReache = dict(izip(G.nodes(), reaches)) return avgReach
def getcentral(g1): # get different centrality return pd.DataFrame({ u'anc': {x: len(nx.ancestors(g1, x)) for x in g1.nodes()}, u'des': {x: len(nx.descendants(g1, x)) for x in g1.nodes()}, u'indeg': g1.in_degree(), u'outdeg': g1.out_degree() })
def mark_reaching_nodes(ea, source_color=COLOR_SOURCE, other_color=COLOR_REACHING): graph = get_nx_graph(ea) graph = graph.reverse() block_ea = get_block_start(ea) for descendant in nx.descendants(graph, block_ea): CodeBlock(descendant).color = other_color CodeBlock(ea).color = source_color
def compute_document_pair_influences(self,delta): #See thm 1 - Beyond Keyword Influence num_samples =int( math.ceil((2/delta*delta)*math.log((len(self.concept_graph.nodes()*(len(self.concept_graph.nodes())-1))/delta) ))) print "Using ", num_samples," graph samples to compute pairwise differences" for c in self.concepts: all_pairs_descendant_count = {} #Compute graph samples and take influence readings for x in range(num_samples): #Uniformly sample edges of concept graph according to weight of egdes of type c sample = self.sample_concept_graph(c) for u in sample.nodes(): for v in sample.nodes(): # Only use (u,v) pairs so as to not double count if (u == v or (v,u) in all_pairs_descendant_count): continue #Setup dictionary entry if ( (u,v) not in all_pairs_descendant_count): all_pairs_descendant_count[(u,v)] = 0 #First approach, using nx libs, slow but it works # descendants_u = nx.descendants(sample,u) descendants_v = nx.descendants(sample,v) descendants_u.add(u) descendants_v.add(v) #Look for one common descendants, break and add to count if found common_desc_found = 0 for desc in descendants_u: if desc in descendants_v: common_desc_found = 1 break if common_desc_found == 1: all_pairs_descendant_count[(u,v)] = all_pairs_descendant_count[(u,v)] + 1 #Iterate through all pairs and add weights to the influence graph (wrt to concept c) for u,v in all_pairs_descendant_count.keys(): #Add u,v weight wrt concept c according to number of samples counted that had a common descendant self.influence_graph.edge[u][v]['weights'][c] = float(all_pairs_descendant_count[(u,v)]) / num_samples
def _find_necessary_steps(self, outputs, inputs): """ Determines what graph steps need to pe run to get to the requested outputs from the provided inputs. Eliminates steps that come before (in topological order) any inputs that have been provided. Also eliminates steps that are not on a path from the provided inputs to the requested outputs. :param list outputs: A list of desired output names. This can also be ``None``, in which case the necessary steps are all graph nodes that are reachable from one of the provided inputs. :param dict inputs: A dictionary mapping names to values for all provided inputs. :returns: Returns a list of all the steps that need to be run for the provided inputs and requested outputs. """ if not outputs: # If caller requested all outputs, the necessary nodes are all # nodes that are reachable from one of the inputs. Ignore input # names that aren't in the graph. necessary_nodes = set() for input_name in iter(inputs): if self.graph.has_node(input_name): necessary_nodes |= nx.descendants(self.graph, input_name) else: # If the caller requested a subset of outputs, find any nodes that # are made unecessary because we were provided with an input that's # deeper into the network graph. Ignore input names that aren't # in the graph. unnecessary_nodes = set() for input_name in iter(inputs): if self.graph.has_node(input_name): unnecessary_nodes |= nx.ancestors(self.graph, input_name) # Find the nodes we need to be able to compute the requested # outputs. Raise an exception if a requested output doesn't # exist in the graph. necessary_nodes = set() for output_name in outputs: if not self.graph.has_node(output_name): raise ValueError("graphkit graph does not have an output " "node named %s" % output_name) necessary_nodes |= nx.ancestors(self.graph, output_name) # Get rid of the unnecessary nodes from the set of necessary ones. necessary_nodes -= unnecessary_nodes # Return an ordered list of the needed steps. return [step for step in self.steps if step in necessary_nodes]
def descendants(self, include_self=False): """ :return: (list) all stages that descend from this stage in the stage_graph """ x = nx.descendants(self.workflow.task_graph(), self) if include_self: return sorted({self}.union(x), key=lambda task: task.stage.number) else: return x
def is_child_of(self, nodes, target_node): "returns True if node is a child of a node in nodes. Otherwise, False" node_span = set() for node in nodes: node_span.add(node) for child in nx.descendants(self.graph, node): node_span.add(child) return target_node in node_span
def detect_deadlock(self): """ Detects whether the system is in a deadlocked state, that is, is there a knot Note that this code is taken and adapted from the NetworkX Developer Zone Ticket #663 knot.py (09/06/2015) >>> from import_params import load_parameters >>> Q = Simulation(load_parameters('tests/datafortesting/logs_test_for_simulation/')) >>> nodes = ['A', 'B', 'C', 'D', 'E'] >>> connections = [('A', 'D'), ('A', 'B'), ('B', 'E'), ('C', 'B'), ('E', 'C')] >>> for nd in nodes: ... Q.digraph.add_node(nd) >>> for cnctn in connections: ... Q.digraph.add_edge(cnctn[0], cnctn[1]) >>> Q.detect_deadlock() True >>> Q = Simulation(load_parameters('tests/datafortesting/logs_test_for_simulation/')) >>> nodes = ['A', 'B', 'C', 'D'] >>> connections = [('A', 'B'), ('A', 'C'), ('B', 'C'), ('B', 'D')] >>> for nd in nodes: ... Q.digraph.add_node(nd) >>> for cnctn in connections: ... Q.digraph.add_edge(cnctn[0], cnctn[1]) >>> Q.detect_deadlock() False >>> Q = Simulation(load_parameters('tests/datafortesting/logs_test_for_simulation/')) >>> nodes = ['A', 'B'] >>> for nd in nodes: ... Q.digraph.add_node(nd) >>> Q.detect_deadlock() False >>> connections = [('A', 'A')] >>> for cnctn in connections: ... Q.digraph.add_edge(cnctn[0], cnctn[1]) >>> Q.detect_deadlock() True """ knots = [] for subgraph in nx.strongly_connected_component_subgraphs(self.digraph): nodes = set(subgraph.nodes()) if len(nodes) == 1: n = nodes.pop() nodes.add(n) if set(self.digraph.successors(n)) == nodes: knots.append(subgraph) else: for n in nodes: successors = nx.descendants(self.digraph, n) if not successors <= nodes: break else: knots.append(subgraph) if len(knots) > 0: return True return False
def _get_paths(self): root = self._get_root() nodes = self.nodes() if not self.node[root]['data'].is_self: nodes.pop(nodes.index(root)) for n in nodes: for desc in nx.descendants(self, n): path = list(nx.all_simple_paths(self, n, desc))[0] yield path
def mark_unreachable_nodes(ea, source_color=COLOR_SOURCE, other_color=COLOR_UNREACHABLE): graph = get_nx_graph(ea) block_ea = get_block_start(ea) descendants = nx.descendants(graph, block_ea) for block in FlowChart(ea): if block.startEA not in descendants: block.color = other_color CodeBlock(ea).color = source_color
def output_exception_classes(): exception_classnames = networkx.descendants(utils.classes, "storage::Exception") exception_classnames.add("storage::Exception") for classname in sorted(exception_classnames): print("%exceptionclass " + classname + ";") print("")
def mark_not_reaching_nodes(ea, source_color=COLOR_SOURCE, other_color=COLOR_NOT_REACHING): graph = get_nx_graph(ea) graph = graph.reverse() block_ea = get_block_start(ea) reaching = nx.descendants(graph, block_ea) for node_ea in graph.nodes_iter(): if node_ea not in reaching: CodeBlock(node_ea).color = other_color CodeBlock(ea).color = source_color
def descendants(self, include_self=False): """ :return: (list) all stages that descend from this stage in the stage_graph """ # return set(it.chain(*breadth_first_search.bfs_successors(self.ex.stage_graph(), self).values())) x = nx.descendants(self.execution.stage_graph(), self) if include_self: return sorted({self}.union(x), key=lambda stage: stage.number) else: return x
def naive_spatial_distances(self, G, max_dist=None): """Find pairwise space-like separations between disconnected nodes Parameters: L - NxN time-like separation matrix, G - directed acyclic graph Return: M - NxN minkowski separation matrix """ M = self.time_separations n = M.shape[0] node_list = list(G.nodes()) if max_dist == None: max_dist = np.max(np.abs(M)) for i in range(n): for j in range(i, n): if M[i,j] == 0 and (i != j): past_i = nx.ancestors(G, node_list[i]) past_j = nx.ancestors(G, node_list[j]) common_past = list(past_i.intersection(past_j)) future_i = nx.descendants(G, node_list[i]) future_j = nx.descendants(G, node_list[j]) common_future = list(future_i.intersection(future_j)) naive_dists = [] if (len(common_past) > 0) and (len(common_future) > 0): for k in common_past: for l in common_future: L_kl = self.longest_path(G, k, l) if L_kl != 0: naive_dists.append(L_kl) else: naive_dists.append(max_dist) S_ij = np.min(naive_dists)**2 M[i,j] = S_ij M[j,i] = S_ij else: M[i,j] = max_dist M[j,i] = max_dist self.minkowski_separations = M
def main(args): gx = load_graph() ctx = MigratorSessionContext("", "", "") start_time = time.time() # limit graph to things that depend on python python_des = nx.descendants(gx, "pypy-meta") for node in sorted( python_des, key=lambda x: (len(nx.descendants(gx, x)), x), reverse=True, ): if time.time() - int(env.get("START_TIME", start_time)) > int( env.get("TIMEOUT", 60 * 30)): break # depfinder only work on python at the moment so only work on things # with python as runtime dep os.makedirs("audits", exist_ok=True) with gx.nodes[node]["payload"] as payload: version = payload.get('version', None) if (not payload.get("archived", False) and version and "python" in payload["requirements"]["run"] and f'{node}_{version}.json' not in os.listdir("audits")): print(node) fctx = FeedstockContext(package_name=node, feedstock_name=payload["name"], attrs=payload) try: deps = audit_feedstock(fctx, ctx) except Exception as e: deps = { "exception": str(e), "traceback": str(traceback.format_exc()).split("\n"), } finally: with open(f"audits/{node}_{version}.json", "w") as f: dump(deps, f)
def processSSDD( self, **kwargs): # Computing once for all T-values for SSDD approach """Using the SSDD approach by Xu et al. to compute the T-values of ontology terms as a dictionary: key (term)/value (score) mapping. """ data = {} tinfo = { self.oroot: 1.0e+00 } rlevel = sorted(list(set(self.DicLevels.values())), reverse=True) for i in rlevel[1:]: for j in [c for c in self.DicLevels if self.DicLevels[c] == i]: par = [ self.Dag.index(s) for s in self.ontodata[self.Dag[j]].parents.id ] w = 1 + len(nx.descendants(self.DagStr, j)) ss = 0.0 for p in par: wp = 1 + len(nx.descendants(self.DagStr, p)) ss += w * tinfo[p] / wp tinfo[j] = ss / len(par) return tinfo
def _initialize_attraction_basin_dist(self): ab_in_dist = {} ab_out_dist = {} # for each node we are calculating the the out and in distances for the other nodes in the graph dists = dict(weighted.all_pairs_dijkstra_path_length(self._gnx, len(self._gnx), weight='weight')) for node in self._gnx: if node not in dists: continue node_dists = dists[node] ab_out_dist[node] = Counter([node_dists[d] for d in nx.descendants(self._gnx, node)]) ab_in_dist[node] = Counter([dists[d][node] for d in nx.ancestors(self._gnx, node)]) return ab_out_dist, ab_in_dist
def remove_states_no_reaching(G, terminals): while True: nodes2remove = set() for n in G.nodes: if n in terminals: continue reachable_states = set(nx.descendants(G, n)) if not terminals & reachable_states: nodes2remove.add(n) for n in nodes2remove: G.remove_node(n) if not nodes2remove: break
def has_operator(u, g, operators): try: if u.operator in operators: return u.operator except AttributeError: pass for v in nx.descendants(g, u): # terminal if not g.succ.get(v): continue # operator # is it temporal except for 'X' ? if v.operator in operators: return v.operator return None
def get_nodes_from_spec(graph, spec): select_parents = spec['select_parents'] select_children = spec['select_children'] filter_map = { SELECTOR_FILTERS.FQN: get_nodes_by_qualified_name, SELECTOR_FILTERS.TAG: get_nodes_by_tag, } node_filter = spec['filter'] filter_func = filter_map.get(node_filter['type']) if filter_func is None: valid_selectors = ", ".join(filter_map.keys()) logger.info("The '{}' selector specified in {} is invalid. Must be " "one of [{}]".format(node_filter['type'], spec['raw'], valid_selectors)) selected_nodes = set() else: selected_nodes = set(filter_func(graph, node_filter['value'])) additional_nodes = set() test_nodes = set() if select_parents: for node in selected_nodes: parent_nodes = nx.ancestors(graph, node) additional_nodes.update(parent_nodes) if select_children: for node in selected_nodes: child_nodes = nx.descendants(graph, node) additional_nodes.update(child_nodes) model_nodes = selected_nodes | additional_nodes for node in model_nodes: # include tests that depend on this node. if we aren't running tests, # they'll be filtered out later. child_tests = [ n for n in graph.successors(node) if graph.node.get(n).get('resource_type') == NodeType.Test ] test_nodes.update(child_tests) return model_nodes | test_nodes
def function_to_cfg(self, func): if self.use_symbol: s = 'vaddr' else: s = 'offset' self.r2.cmd('s ' + str(func[s])) try: cfg = json.loads(self.r2.cmd('agfj ' + str(func[s]))) except: cfg = [] my_cfg = nx.DiGraph() acfg = nx.DiGraph() lstm_cfg = nx.DiGraph() if len(cfg) == 0: return my_cfg, acfg, lstm_cfg else: cfg = cfg[0] for block in cfg['blocks']: disasm, block_bytes, annotations, filtered_instructions = self.process_block( block) my_cfg.add_node(block['offset'], asm=block_bytes, label=disasm) acfg.add_node(block['offset'], features=annotations) lstm_cfg.add_node(block['offset'], features=filtered_instructions) for block in cfg['blocks']: if 'jump' in block: if block['jump'] in my_cfg.nodes: my_cfg.add_edge(block['offset'], block['jump']) acfg.add_edge(block['offset'], block['jump']) lstm_cfg.add_edge(block['offset'], block['jump']) if 'fail' in block: if block['fail'] in my_cfg.nodes: my_cfg.add_edge(block['offset'], block['fail']) acfg.add_edge(block['offset'], block['fail']) lstm_cfg.add_edge(block['offset'], block['fail']) between = nx.betweenness_centrality(acfg) for n in acfg.nodes(data=True): d = n[1]['features'] d['offspring'] = len(nx.descendants(acfg, n[0])) d['betweenness'] = between[n[0]] n[1]['features'] = d return my_cfg, acfg, lstm_cfg
def ConstructGoAnnotationArray(): genes_dict = pickle.load(open(GENES_DICT_FILE, 'rb')) go_dict = pickle.load(open(GO_DICT_FILE, 'rb')) inv_go_dict = {v: k for k, v in go_dict.items()} m = len(genes_dict.keys()) f = len(go_dict.keys()) # -1 indicates negative example, 0 is neither positive nor negative alt_ids_dic = pickle.load(open(ALT_IDS_DICT_FILE, 'rb')) go_labels = np.zeros((m, f)) lines = open(GO_ANNOTATION_FILE, 'r').readlines() for x in range(1, len(lines)): line = lines[x] values = line.split('\t') geneids = values[0].split(',') for geneid in geneids: if geneid in genes_dict.keys(): goids = [v.strip(';') for v in values[2].split(' ')] gene_idx = genes_dict[geneid] for go_id in goids: new_id = go_id if go_id not in alt_ids_dic else alt_ids_dic[ go_id] if new_id in go_dict: go_idx = go_dict[new_id] go_labels[gene_idx, go_idx] = 1 # mark parents as non-negative examples of all children obo_graph = obonet.read_obo(OBODB_FILE) for i in range(m): pos_idxs = np.argwhere(go_labels[i] == 1).flatten() for j in pos_idxs: goid = inv_go_dict[j] children = networkx.ancestors(obo_graph, goid) for child in children: if child in go_dict: child_idx = go_dict[child] if go_labels[i, child_idx] == 0: go_labels[ i, child_idx] = -1 # mark as non-negative example parents = networkx.descendants(obo_graph, goid) for parent in parents: if parent in go_dict: parent_idx = go_dict[parent] go_labels[i, parent_idx] = 1 np.savetxt(GO_LABEL_ARR_FILE, go_labels) print("Constructed go annotation array with " + str(m) + " examples and " + str(f) + " annotations")
def probe_max_dop(self, u, v, unew, vnew, update=False): """ An incremental antichain (which appears significantly more efficient than the networkx antichains) But only works for DoP, not for weighted width """ if (self._max_antichains is None): new_ac = DAGUtil.get_max_antichains(self._dag) if (update): self._max_antichains = new_ac if (len(new_ac) == 0): if (update): self._max_antichains = None return 0 else: return len(new_ac[0]) else: if (unew): ups = nx.descendants(self._dag, u) new_node = u elif (vnew): ups = nx.ancestors(self._dag, v) new_node = v else: raise SchedulerException("u v are both new/old") new_ac = [] md = 1 for ma in self._max_antichains: # missing elements in the current max_antichains! #incremental updates found = False for n in ma: if (n in ups): found = True break if (not found): mma = list(ma) mma.append(new_node) new_ac.append(mma) if (len(mma) > md): md = len(mma) elif (len(ma) > md): md = len(ma) new_ac.append(ma) # carry over, then prune it if (len(new_ac) > 0): if (update): self._max_antichains = new_ac return md else: raise SchedulerException("No antichains")
def make_tree_stru(self, seg_map, adj_list, list_root_ds): """ make a tree structure using the structural information """ cur_tree = nx.DiGraph() # tree structure for current document # consider root first for i in list_root_ds: cur_root_seg = seg_map[i] cur_tree.add_node( cur_root_seg[0] ) # add the first item of segments in the root level # connect the first item of each segment in the root level for i in range(len(list_root_ds)): for j in range(i + 1, len(list_root_ds)): cur_root_pair = (list_root_ds[i], list_root_ds[j]) # adj_list.append(cur_root_pair) src_seg = seg_map[cur_root_pair[0]] dst_seg = seg_map[cur_root_pair[1]] cur_tree.add_edge( src_seg[0], dst_seg[0]) # connect the first item of segments # connect sentences each other within intra segment for cur_seg, sents_seg in seg_map.items(): if len(sents_seg) > 1: for i in range(len(sents_seg) - 1): cur_tree.add_edge(sents_seg[i], sents_seg[i + 1]) # then between segments for cur_pair in adj_list: src_seg = seg_map[cur_pair[0]] dst_seg = seg_map[cur_pair[1]] cur_tree.add_edge(src_seg[0], dst_seg[0]) # first sentence version # connect between siblings for cur_root in list_root_ds: childs = nx.descendants(cur_tree, cur_root) for cur_child in childs: siblings = list(cur_tree.successors(cur_child)) if len(siblings) > 1: for i in range(len(siblings)): for j in range(i + 1, len(siblings)): cur_tree.add_edge(siblings[i], siblings[j]) return cur_tree
def cmd_test(filename, alg, i): DG, terms, root = get_graph(filename, with_root=True) v = nx.number_of_nodes(DG) e = nx.number_of_edges(DG) print('root is', root) print("Number of vertices: ", v) print("Number of reachable vertices: ", len(nx.descendants(DG, root)) + 1) print("Number of edges: ", e) print('') print('apsp started') start_time = time.time() tr_cl = trans_clos_dense(DG) # print_graph(tr_cl) elapsed_time = time.time() - start_time print('apsp finished in', elapsed_time) if alg == 'alg3': print('Alg3 with i = ', i, 'started') start_time = time.time() set_start_time(start_time) terms.sort() tree = alg3(tr_cl, i=i, k=len(terms), r=root, x=terms) elapsed_time = time.time() - start_time print('Elapsed time = ', elapsed_time) tot_weight = tree.size(weight='weight') print('Weight of MSTw = ', tot_weight) if alg == 'alg4': print('Alg4 with i = ', i, 'started') start_time = time.time() set_start_time(start_time) terms.sort() tree = alg4(tr_cl, i=i, k=len(terms), r=root, x=terms) elapsed_time = time.time() - start_time print('Elapsed time = ', elapsed_time) tot_weight = tree.size(weight='weight') print('Weight of MSTw = ', tot_weight) if alg == 'alg6': print('Alg6 with i = ', i, 'started') start_time = time.time() set_start_time(start_time) terms.sort() tree = alg6(tr_cl, i=i, k=len(terms), r=root, x=terms) elapsed_time = time.time() - start_time print('Elapsed time = ', elapsed_time) tot_weight = tree.size(weight='weight') print('Weight of MSTw = ', tot_weight)
def is_s_reachable(self, d1: str, d2: str) -> bool: """ Determine whether 'D2' is s-reachable from 'D1' (Koller and Milch, 2001) A node D2 is s-reachable from a node D1 in a MACID M if there is some utility node U ∈ U_D1 ∩ Desc(D1) such that if a new parent D2' were added to D2, there would be an active path in M from D2′ to U given Pa(D)∪{D}, where a path is active in a MAID if it is active in the same graph, viewed as a BN. """ mg = self.mechanism_graph() agent = mg.whose_node[d1] agent_utilities = mg.utility_nodes_agent[agent] descended_agent_utilities = [util for util in agent_utilities if util in nx.descendants(mg, d1)] con_nodes = [d1] + mg.get_parents(d1) s_reachable = any([mg.is_active_trail(d2 + "mec", u_node, con_nodes) for u_node in descended_agent_utilities]) return s_reachable
def getNodeDVByDF(sub_DAG, node, decay_factor=0.5): ''' 根据衰退因子求取子图中节点的DV值 :param sub_DAG: networkx.DirectGraph,表示一个有向无循环图的子图 :param node: str,表示该图中的一个节点 :param decay_factor: float,表示衰退因子。其直在0-1之间,默认为0.5。 :return: ''' DV = 0 ancestors = nx.descendants(sub_DAG, node) for an in ancestors: path_length = nx.shortest_path_length(sub_DAG, node, an) DV += math.pow(decay_factor, path_length) return DV
def back_flow(self, tx_leaf, tx_root): ancestors = set([tx_leaf]).union(nx.ancestors(self, tx_leaf)) descendants = set([tx_root]).union(nx.descendants(self, tx_root)) H = self.subgraph(ancestors.intersection(descendants)) ancestors, descendants = None, None if len(H.nodes()) == 0: return 0 nx.set_node_attributes(H, "temp", 1) layers = create_layers(H) for layer in layers[1:]: for T in layer: H.node[T]["temp"] = sum([ H.edge[T][U]["p"] * H.node[U]["temp"] for U in H.successors(T) ]) return H.node[tx_root]["temp"]
def get_all_dvc_files_that_are_not_needed(dvc_filenames): from dvc.repo import Repo import networkx as nx dvcrepo = Repo('.') pipelines = dvcrepo.pipelines descendants_stages = dvc_filenames.copy() for G in pipelines: try: for dvc_filename in dvc_filenames: descendants_stages.extend(list(nx.descendants(G,dvc_filename))) except: continue all_stages = [s.relpath for s in dvcrepo.stages] return [s for s in all_stages if s not in descendants_stages]
def get_desc_net_node_cent(self, source = None, exclude_nodes = [], cent_list = None): nodes_in_graph = nx.descendants(self.g, source) nodes_in_graph |= set([source]) nodes_in_graph -= set(exclude_nodes) if not len(nodes_in_graph): desc_net = MutNet() desc_net = \ MutNet(net = self.g.subgraph(nodes_in_graph)) desc_net.get_node_centralities(cent_list = cent_list) return desc_net
def has_operator_old(u, g, operators): """Return `True` if AST `u` contains any `operators`.""" try: if u.operator in operators: return u.operator except AttributeError: pass for v in nx.descendants(g, u): # terminal if not g.succ.get(v): continue # operator # is it temporal except for 'X' ? if v.operator in operators: return v.operator return None
def _generate_paths(self, path, sink, r_up, w_p, all_paths): """Function for recursively generating all (s, t) paths, together with the corresponding parameters r_up and w_p, where w_p denotes the total cost (length) of path p.""" # Generate a path from here to the sink t (path_cost, sp) = nx.single_source_dijkstra(G=self.graph_container.graph, source=path[-1], target=sink, weight='length') if path_cost <= self.L_max: all_paths.append((path + sp[1:], r_up, w_p + path_cost)) if len(r_up) < self.N_max: for rep_node in self.graph_container.possible_rep_nodes: if rep_node not in r_up and rep_node in nx.descendants(G=self.graph_container.graph, source=path[-1]): (path_cost, sp) = nx.single_source_dijkstra(G=self.graph_container.graph, source=path[-1], target=rep_node, weight='length') if path_cost <= self.L_max: self._generate_paths(path=path + sp[1:], sink=sink, r_up=r_up + [rep_node], w_p=w_p + path_cost, all_paths=all_paths)
def descendants_dependencies(self, filepath): try: return self._cache['descendants_dependencies'][filepath] except KeyError: dependencies = deepcopy(self.imports[filepath]) for descendant in nx.descendants(self.digraph, filepath): for key in dependencies: dependencies[key] |= self.imports[descendant][key] for key in dependencies: dependencies[key] = list(dependencies[key]) dependencies[key].sort() self._cache['descendants_dependencies'][filepath] = dependencies return dependencies
def can_add(self, parent, name, klass): if parent is self.model: return name not in parent.namespace else: # parent is UserSpaceImpl if name in parent.namespace: return False else: node = parent.get_fullname(omit_model=True) descs = nx.descendants(self._graph, node) for desc in descs: ns = self._graph.to_space(desc).namespace if desc in ns and not isinstance(ns[desc], klass): return False return True
def print_impacting_modules(single_node=None): """ For each module, print a list of modules that the module is depending on, i.e. modules whose change can potentially impact the module. The function shows all levels of dependency, not just the immediately imported modules. :return: """ print('\n===Impacting Modules===') for node_name in G.nodes(): if single_node and (node_name != single_node): continue descendants = nx.descendants(G, node_name) print(augment_format_string(node_name, '\n%s:') % node_name) for d in descendants: print(augment_format_string(d, ' %s') % d)
def solve_entry_tips(grap, starting_nd): """ Removing tips in starting not interesting, for keeping only pertinent ones. Parameters ---------- grap : networkX graph Graph obtained from the NetworkX module. starting_nd : list list of starting nodes. sink_nd : list list of sinking nodes. Returns ------- grap : networkX graph Graph obtained from the NetworkX module without entry tips uninteresting. """ ancestors = [] paths = [] path_l = [] path_w = [] for node in starting_nd: for des in nx.descendants(grap, node): # with while it's tricky so we'll go with a for loop # while len(g.pred[des]) >= 2: # if des not in ancestors: # ancestors.append(des) n_predecessor = grap.pred[des] if len(n_predecessor) >= 2 and des not in ancestors: ancestors.append(des) for anc in ancestors: for path in nx.all_simple_paths(grap, node, anc): path_w.append(path_average_weight(grap, path)) path_l.append(len(path)) paths.append(path) grap = select_best_path(grap, paths, path_l, path_w, delete_entry_node=True, delete_sink_node=False) return grap
def add(self, u, v, gu, gv, sequential=False, global_dag=None): """ Add nodes u and/or v to the partition if sequential is True, break antichains to sequential chains """ # if (self.partition_id == 180): # logger.debug("u = ", u, ", v = ", v, ", partition = ", self.partition_id) uw = gu["weight"] vw = gv["weight"] unew = u not in self._dag.node vnew = v not in self._dag.node self._dag.add_node(u, weight=uw, num_cpus=gu["num_cpus"]) self._dag.add_node(v, weight=vw, num_cpus=gv["num_cpus"]) self._dag.add_edge(u, v) if unew and vnew: # we know this is fast self._max_antichains = DAGUtil.get_max_antichains(self._dag) self._max_dop = 1 else: if sequential and (global_dag is not None): # break potential antichain to sequential chain if unew: v_ups = nx.ancestors(self._dag, v) for vup in v_ups: if u == vup: continue if len(list(self._dag.predecessors(vup))) == 0: # link u to "root" parent of v to break antichain self._dag.add_edge(u, vup) # change the original global graph global_dag.add_edge(u, vup, weight=0) if not nx.is_directed_acyclic_graph(global_dag): global_dag.remove_edge(u, vup) else: u_downs = nx.descendants(self._dag, u) for udo in u_downs: if udo == v: continue if len(list(self._dag.successors(udo))) == 0: # link "leaf" children of u to v to break antichain self._dag.add_edge(udo, v) # change the original global graph global_dag.add_edge(udo, v, weight=0) if not nx.is_directed_acyclic_graph(global_dag): global_dag.remove_edge(udo, v) self._max_dop = self.probe_max_dop(u, v, unew, vnew, update=True)
def _cut_biggest_rep(self, node_val_dict, thresh=.9): """ gets largest nodes whose val is >= thresh% of leaf ancestors mean Args: node_val_dict (dict): keys are nodes, values are floats thresh (float): threshold of leaf ancestor mean needed for a node to be a valid representative Returns: node_list (list): """ valid_node_set = set() node_sum_dict = defaultdict(lambda: 0) node_count_dict = defaultdict(lambda: 0) for n in range(len(self)): # the iterator above yields lexicographical sorted nodes kids = list(self.neighbors(n)) if kids: # non-leaf node # update counts of node_sum and node_count node_sum_dict[n] = sum(node_sum_dict[_n] for _n in kids) node_count_dict[n] = sum(node_count_dict[_n] for _n in kids) # if node's value exceeds threshold, add it to valid nodes _thresh = thresh * node_sum_dict[n] / node_count_dict[n] if node_val_dict[n] >= _thresh: valid_node_set.add(n) else: # leaf node, valid by default (has no constituents) valid_node_set.add(n) node_sum_dict[n] = node_val_dict[n] node_count_dict[n] = 1 # get biggest valid nodes node_list = list() for n in reversed(range(len(self))): # reverse lexicographical if n in valid_node_set: node_list.append(n) valid_node_set -= set(nx.descendants(self, n)) return node_list
def __generate_hash__(self, node): """ For a provided node in the class' graph, generates its corresponding data-hash as is. This collects the data-hashes of all descendants :param node: The candidate node :return: None """ # Start with my data data = self.graph.nodes[node]["data"] # Append all descendent nodes' hashes for elem in nx.descendants(self.graph, node): # TODO: Check if deterministic if not self.graph.nodes[elem]["dataHash"] is None: data += self.graph.nodes[elem]["dataHash"] self.graph.nodes[node]["dataHash"] = hashlib.sha3_256( data.encode('utf-8')).hexdigest()
def create_subgraph(G, src='Start', dest='Finish', title=None, begin=None, deadline=None): # First we're going to search for all the nodes in between # src and dest, including the src and dest nodes = [node for node in nx.ancestors(G, dest) if node in nx.descendants(G, src)] nodes.extend([src, dest]) # Then we create a new graph from G which only contains # the nodes we searched previously H = nx.DiGraph(G.subgraph(nodes)) # Set the Title, Begin and Deadline for the new graph. # If the values are not provided, use the values from G. H.graph['Title'] = G.graph['Title'] if title is None else title H.graph['Begin'] = G.graph['Begin'] if begin is None else begin H.graph['Deadline'] = G.graph['Deadline'] if deadline is None else deadline return H
def _build(G, target=None, full=False): import networkx as nx from dvc.repo.graph import get_pipeline, get_pipelines if target: H = get_pipeline(get_pipelines(G), target) if not full: descendants = nx.descendants(G, target) descendants.add(target) H.remove_nodes_from(set(G.nodes()) - descendants) else: H = G def _relabel(stage): return stage.addressing return nx.relabel_nodes(H, _relabel, copy=False)
def revealing_or_denying(macid: MACID, decision: str, effective_set: List[str]) -> bool: """checks to see whether this decision is motivated by an incentive for revealing or denying Graphical Criterion: 1) There is a directed decision-free path from D_A to an effective decision node D_B. 2) There is a direced, effective path from D_B to U_A. 3) There is an effective indirect front-door path π from D_A to U_B that is not blocked by D_B U W^{D_A}_{D_B}. """ if decision not in macid.nodes: raise Exception(f"{decision} is not present in the macid") if not all([node in macid.nodes for node in effective_set]): raise Exception( "One or many of the nodes in the effective_set are not present in the macid." ) agent = macid.whose_node[decision] agent_utils = macid.utility_nodes_agent[agent] reachable_decisions = [] # set of possible D_B list_decs = copy.deepcopy(macid.all_decision_nodes) list_decs.remove(decision) for dec_reach in list_decs: if dec_reach in effective_set: if directed_decision_free_path(macid, decision, dec_reach): reachable_decisions.append(dec_reach) for decision_b in reachable_decisions: agent_b = macid.whose_node[decision_b] agent_b_utils = macid.utility_nodes_agent[agent_b] for u in agent_utils: if _effective_dir_path_exists(macid, decision_b, u, effective_set): for u_b in agent_b_utils: decision_b_parents_not_desc_decision = [ node for node in macid.get_parents(decision_b) if node not in set(nx.descendants(macid, decision)) ] cond_nodes = [decision_b ] + decision_b_parents_not_desc_decision if is_active_indirect_frontdoor_trail( macid, decision, u_b, cond_nodes): return True else: return False
def check_inputs(G, input_nodes): '''Given a networkx graph G and a set of input_nodes, checks whether the inputs are valid''' for n in input_nodes: if n not in G.nodes(): raise ValueError( 'The input nodes provided must all be in the graph.') input_cover = set([]) for n in input_nodes: input_cover |= (set([n]) | set(nx.descendants(G, n))) if input_cover != set(G.nodes()): missed_nodes = ', '.join(list(set(G.nodes()) - input_cover)) raise ValueError( 'Not all valid input nodes have been provided, as the following nodes will not receive any data: {}' .format(missed_nodes))