def traverse_graph(g, edge, delta, upstream=True): """Traverse the graph in a breadth-first-search manner Parameters ---------- g : the graph to explore edge : the starting edge, normally this is the *solution edge* delta : signed change in absolute value (eg. tons) on the implementation flow (delta). For example -26.0 (tons) upstream : The direction of traversal. When upstream is True, the graph is explored upstream first, otherwise downstream first. Returns ------- Edge ProperyMap (float) The signed change on the edges """ # Property map for keeping track of the visited edge. Once an edge has # been visited it won't be processed anymore. amount = g.ep.amount visited = g.new_edge_property("bool", val=False) change = g.new_edge_property("float", val=0.0) change[edge] = delta visited[edge] = True # We are only interested in the edges that define the solution g.set_edge_filter(g.ep.include) # By default we go upstream first, because 'demand dictates supply' if upstream: g.set_reversed(True) balance_factor = 1 / g.vp.downstream_balance_factor.a node = edge.target() else: g.set_reversed(False) balance_factor = g.vp.downstream_balance_factor.a node = edge.source() node_visitor = NodeVisitor(g.vp["id"], amount, visited, change, balance_factor) search.bfs_search(g, node, node_visitor) # now go downstream, if we started upstream # (or upstream, if we started downstream) g.set_reversed(not g.is_reversed()) node = edge.target() if g.is_reversed() else edge.source() # reverse the balancing factors node_visitor.balance_factor = 1 / node_visitor.balance_factor # print("\nTraversing in 2. direction") search.bfs_search(g, node, node_visitor) # finally clean up del visited g.set_reversed(False) g.clear_filters() return node_visitor.change
def generate(self): if self.verbose: print("Generate neuron radii...") rrg = RandomRadiusGenerator(self.skeleton, self.source, self.min_radius, self.max_radius) bfs_search(self.skeleton.get_graph(), self.source, rrg) return rrg.radius_vp
def remove_redundant_edges_by_bfs(g, root): """for undirected grap, remove redundant edges unvisited by BFS""" vis = EdgeCollectorVisitor() bfs_search(g, source=root, visitor=vis) efilt = g.new_edge_property('bool') efilt.a = False for u, v in vis.edges: try: efilt[g.edge(u, v)] = True except ValueError: efilt[g.edge(v, u)] = True g.set_edge_filter(efilt) return g
def fill_missing_time(g, t, root, obs_nodes, infection_times, debug=False): root = int(root) # force type # for each node, # get its ancestor and descendent # by ancestor, it's the closest observed node on the end to the root # by descendent, it's the next closest node on the other end to the leaf td_vis = TopDownVisitor( np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes) bfs_search(t, source=root, visitor=td_vis) bu_vis = BottomUpVisitor( np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes) bottom_up_traversal(t, vis=bu_vis) # infer the time hidden_nodes = set(map(int, t.vertices())) - set(obs_nodes) assert (root not in hidden_nodes), 'root is hidden' pred_infection_times = np.array(infection_times) dist = shortest_distance(t, source=root) for v in hidden_nodes: ans, des = td_vis.pred[v], bu_vis.succ[v] assert (ans != -1 and des != -1), \ '{}, {}'.format(v, (t.vertex(v).in_degree(), t.vertex(v).out_degree())) # 1, 0, v=leave if debug: print(v, ans, des) denum = dist[des] - dist[ans] numer = dist[v] - dist[ans] pred_infection_times[v] = ( infection_times[ans] + abs(numer / denum * (infection_times[des] - infection_times[ans]))) if debug: assert pred_infection_times[v] > infection_times[ans] assert pred_infection_times[v] < infection_times[des] print('t(ans), t(des): {}, {}'.format(infection_times[ans], infection_times[des])) print('numer {}'.format(numer)) print('denum {}'.format(denum)) print('pred time {}'.format(pred_infection_times[v])) return pred_infection_times
def edges_to_directed_tree(g, root, edges): t = Graph(directed=False) for _ in range(g.num_vertices()): t.add_vertex() for u, v in edges: t.add_edge(u, v) vis = EdgeCollectorVisitor() bfs_search(t, source=root, visitor=vis) t.clear_edges() t.set_directed(True) for u, v in vis.edges: t.add_edge(u, v) return filter_nodes_by_edges(t, edges)
def build_closure(g, terminals, debug=False, verbose=False): """build the transitive closure on terminals""" def get_edges(dist, root, terminals): """get adjacent edges to root with weight""" return ((root, t, dist[t]) for t in terminals if dist[t] != -1 and t != root) terminals = list(terminals) gc = Graph(directed=False) gc.add_vertex(g.num_vertices()) edges_with_weight = set() r2pred = {} # root to predecessor map (from bfs) # bfs to all other nodes for r in terminals: if debug: print('root {}'.format(r)) vis = init_visitor(g, r) bfs_search(g, source=r, visitor=vis) new_edges = set(get_edges(vis.dist, r, terminals)) if debug: print('new edges {}'.format(new_edges)) edges_with_weight |= new_edges r2pred[r] = vis.pred for u, v, c in edges_with_weight: gc.add_edge(u, v) # edge weights eweight = gc.new_edge_property('int') weights = np.array([c for _, _, c in edges_with_weight]) eweight.set_2d_array(weights) # vfilt = gc.new_vertex_property('bool') vfilt.a = False for v in terminals: vfilt[v] = True gc.set_vertex_filter(vfilt) return gc, eweight, r2pred
def observe_cascade(c, source, q, method='uniform', tree=None, source_includable=False): """ given a cascade `c` and `source`, return a list of observed nodes according to probability `q` """ all_infection = np.nonzero(c != -1)[0] if not source_includable: all_infection = list(set(all_infection) - {source}) num_obs = int(math.ceil(len(all_infection) * q)) if num_obs < 2: num_obs = 2 if method == 'uniform': return np.random.permutation(all_infection)[:num_obs] elif method == 'late': return np.argsort(c)[-num_obs:] elif method == 'leaves': assert tree is not None, 'to get the leaves, the cascade tree is required' # extract_steiner_tree(tree, ) nodes_in_order = reverse_bfs(tree) return nodes_in_order[:num_obs] elif method == 'bfs-head': assert tree is not None, 'the cascade tree is required' vis = BFSNodeCollector() bfs_search(GraphView(tree, directed=False), source, vis) sampling_weights_by_order vis.nodes_in_order return vis.nodes_in_order[:num_obs] # head elif method == 'bfs-tail': assert tree is not None, 'the cascade tree is required' vis = BFSNodeCollector() bfs_search(GraphView(tree, directed=False), source, vis) return vis.nodes_in_order[-num_obs:] # tail else: raise ValueError('unknown method {}'.format(method))
def fill_missing_time(g, t, root, obs_nodes, infection_times, debug=False): # get ancestor and descendent td_vis = TopDownVisitor(np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes) bfs_search(t, source=root, visitor=td_vis) bu_vis = BottomUpVisitor(np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes) bottom_up_traversal(t, vis=bu_vis) # infer the time hidden_nodes = set(map(int, t.vertices())) - set(obs_nodes) assert (root not in hidden_nodes), 'root is hidden' pred_infection_times = np.array(infection_times) dist = shortest_distance(t, source=root) for v in hidden_nodes: ans, des = td_vis.pred[v], bu_vis.pred[v] assert ans != -1 assert des != -1, \ '{}, {}'.format(v, (t.vertex(v).in_degree(), t.vertex(v).out_degree())) # 1, 0, v=leave if debug: print(v, ans, des) denum = dist[des] - dist[ans] numer = dist[v] - dist[ans] pred_infection_times[v] = (infection_times[ans] + abs(numer / denum * (infection_times[des] - infection_times[ans]))) if debug: assert pred_infection_times[v] > infection_times[ans] assert pred_infection_times[v] < infection_times[des] print('t(ans), t(des): {}, {}'.format(infection_times[ans], infection_times[des])) print('numer {}'.format(numer)) print('denum {}'.format(denum)) print('pred time {}'.format(pred_infection_times[v])) return pred_infection_times
def traverse_graph(g, edge, delta, upstream=True): """Traverse the graph in a breadth-first-search manner Parameters ---------- g : the graph to explore edge : the starting edge, normally this is the *solution edge* delta : signed change in absolute value (eg. tons) on the implementation flow (delta). For example -26.0 (tons) upstream : The direction of traversal. When upstream is True, the graph is explored upstream first, otherwise downstream first. Returns ------- Edge ProperyMap (float) The signed change on the edges """ plot = False amount = g.ep.amount change = g.new_edge_property("float", val=0.0) total_change = g.new_edge_property("float", val=0.0) if plot: # prepare plotting of intermediate results from repair.apps.asmfa.tests import flowmodeltestdata flowmodeltestdata.plot_materials(g, file='materials.png') flowmodeltestdata.plot_amounts(g, 'amounts.png', 'amount') g.ep.change = change # We are only interested in the edges that define the solution g.set_edge_filter(g.ep.include) MAX_ITERATIONS = 20 balance_factor = g.vp.downstream_balance_factor.a # make a first run with the given changes to the implementation edge # By default we go upstream first, because 'demand dictates supply' if upstream: node = edge.source() g.set_reversed(True) balance_factor = 1 / balance_factor else: node = edge.target() g.set_reversed(False) # initialize the node-visitors node_visitor = NodeVisitor(g.vp["id"], amount, change, balance_factor) node_visitor2 = NodeVisitorBalanceDeltas(g.vp["id"], amount, change, balance_factor) node_visitor.forward = True total_change.a[:] = 0 new_delta = delta i = 0 change[edge] = new_delta # start in one direction search.bfs_search(g, node, node_visitor) change[edge] = new_delta if plot: ## Plot changes after forward run g.ep.change.a[:] = change.a flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change') node = reverse_graph(g, node_visitor, node_visitor2, edge) search.bfs_search(g, node, node_visitor) change[edge] = new_delta if plot: ## Plot changes after backward run g.ep.change.a[:] = change.a flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change') # balance out the changes search.bfs_search(g, node, node_visitor2) change[edge] = new_delta # add up the total changes total_change.a += change.a if plot: ## Plot total changes g.ep.change.a[:] = total_change.a flowmodeltestdata.plot_amounts(g, f'plastic_deltas_{i}.png', 'change') node = reverse_graph(g, node_visitor, node_visitor2, edge) if upstream: if node.in_degree(): sum_f = node.in_degree(weight=total_change) new_delta = delta - sum_f else: new_delta = 0 else: if node.out_degree(): sum_f = node.out_degree(weight=total_change) new_delta = delta - sum_f else: new_delta = 0 i += 1 while i < MAX_ITERATIONS and abs(new_delta) > 0.00001: change.a[:] = 0 change[edge] = new_delta # start in one direction search.bfs_search(g, node, node_visitor) change[edge] = 0 if plot: ## Plot changes after forward run g.ep.change.a[:] = change.a flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change') # now go downstream, if we started upstream # (or upstream, if we started downstream) node = reverse_graph(g, node_visitor, node_visitor2, edge) if upstream: sum_f = node.out_degree(weight=total_change) + \ node.out_degree(weight=change) else: sum_f = node.in_degree(weight=total_change) + \ node.in_degree(weight=change) new_delta = delta - sum_f change[edge] = new_delta search.bfs_search(g, node, node_visitor) if plot: ## Plot changes after backward run g.ep.change.a[:] = change.a flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change') # balance out the changes search.bfs_search(g, node, node_visitor2) change[edge] = 0 if plot: ## Plot changes after balancing g.ep.change.a[:] = change.a flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change') # add up the total changes total_change.a += change.a node = reverse_graph(g, node_visitor, node_visitor2, edge) if plot: ## Plot total changes g.ep.change.a[:] = total_change.a flowmodeltestdata.plot_amounts(g, f'plastic_deltas_{i}.png', 'change') if upstream: if node.in_degree(): sum_f = node.in_degree(weight=total_change) new_delta = delta - sum_f else: new_delta = 0 else: if node.out_degree(): sum_f = node.out_degree(weight=total_change) new_delta = delta - sum_f else: new_delta = 0 i += 1 # finally clean up g.set_reversed(False) g.clear_filters() return total_change
def test_BFSNodeCollectorVisitor(g, expected): vis = BFSNodeCollector() bfs_search(g, 0, vis) assert vis.nodes_in_order == expected
def run( input_file: KGTKFiles, output_file: KGTKFiles, root: typing.Optional[typing.List[str]], rootfile, rootfilecolumn, subject_column_name: typing.Optional[str], object_column_name: typing.Optional[str], predicate_column_name: typing.Optional[str], props: typing.Optional[typing.List[str]], props_file: typing.Optional[str], propsfilecolumn: typing.Optional[str], inverted: bool, inverted_props: typing.Optional[typing.List[str]], inverted_props_file: typing.Optional[str], invertedpropsfilecolumn: typing.Optional[str], undirected: bool, undirected_props: typing.Optional[typing.List[str]], undirected_props_file: typing.Optional[str], undirectedpropsfilecolumn: typing.Optional[str], label: str, selflink_bool: bool, show_properties: bool, breadth_first: bool, depth_limit: typing.Optional[int], errors_to_stdout: bool, errors_to_stderr: bool, show_options: bool, verbose: bool, very_verbose: bool, **kwargs, # Whatever KgtkFileOptions and KgtkValueOptions want. ): import sys import csv from pathlib import Path import time from graph_tool.search import dfs_iterator, bfs_iterator, bfs_search, BFSVisitor # from graph_tool import load_graph_from_csv from graph_tool.util import find_edge from kgtk.exceptions import KGTKException from kgtk.cli_argparse import KGTKArgumentParser from kgtk.gt.gt_load import load_graph_from_kgtk from kgtk.io.kgtkwriter import KgtkWriter from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.value.kgtkvalueoptions import KgtkValueOptions #Graph-tool names columns that are not subject or object c0, c1... This function finds the number that graph tool assigned to the predicate column def find_pred_position(sub, pred, obj): if pred < sub and pred < obj: return pred elif (pred > sub and pred < obj) or (pred < sub and pred > obj): return pred - 1 else: return pred - 2 def get_edges_by_edge_prop(g, p, v): return find_edge(g, prop=g.properties[('e', p)], match=v) input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file) output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file) # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr # Build the option structures. input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict( kwargs, who="input", fallback=True) root_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict( kwargs, who="root", fallback=True) props_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict( kwargs, who="props", fallback=True) undirected_props_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict( kwargs, who="undirected_props", fallback=True) inverted_props_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict( kwargs, who="inverted_props", fallback=True) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) if root is None: root = [] # This simplifies matters. if props is None: props = [] # This simplifies matters. if undirected_props is None: undirected_props = [] # This simplifies matters. if inverted_props is None: inverted_props = [] # This simplifies matters. if show_options: if root is not None: print("--root %s" % " ".join(root), file=error_file) if rootfile is not None: print("--rootfile=%s" % rootfile, file=error_file) if rootfilecolumn is not None: print("--rootfilecolumn=%s" % rootfilecolumn, file=error_file) if subject_column_name is not None: print("--subj=%s" % subject_column_name, file=error_file) if object_column_name is not None: print("--obj=%s" % object_column_name, file=error_file) if predicate_column_name is not None: print("--pred=%s" % predicate_column_name, file=error_file) if props is not None: print("--props=%s" % " ".join(props), file=error_file) if props_file is not None: print("--props-file=%s" % props_file, file=error_file) if propsfilecolumn is not None: print("--propsfilecolumn=%s" % propsfilecolumn, file=error_file) print("--inverted=%s" % str(inverted), file=error_file) if inverted_props is not None: print("--inverted-props=%s" % " ".join(inverted_props), file=error_file) if inverted_props_file is not None: print("--inverted-props-file=%s" % inverted_props_file, file=error_file) if invertedpropsfilecolumn is not None: print("--invertedpropsfilecolumn=%s" % invertedpropsfilecolumn, file=error_file) print("--undirected=%s" % str(undirected), file=error_file) if undirected_props is not None: print("--undirected-props=%s" % " ".join(undirected_props), file=error_file) if undirected_props_file is not None: print("--undirected-props-file=%s" % undirected_props_file, file=error_file) if undirectedpropsfilecolumn is not None: print("--undirectedpropsfilecolumn=%s" % undirectedpropsfilecolumn, file=error_file) print("--label=%s" % label, file=error_file) print("--selflink=%s" % str(selflink_bool), file=error_file) print("--breadth-first=%s" % str(breadth_first), file=error_file) if depth_limit is not None: print("--depth-limit=%d" % depth_limit, file=error_file) input_reader_options.show(out=error_file) root_reader_options.show(out=error_file) props_reader_options.show(out=error_file) undirected_props_reader_options.show(out=error_file) inverted_props_reader_options.show(out=error_file) value_options.show(out=error_file) KgtkReader.show_debug_arguments(errors_to_stdout=errors_to_stdout, errors_to_stderr=errors_to_stderr, show_options=show_options, verbose=verbose, very_verbose=very_verbose, out=error_file) print("=======", file=error_file, flush=True) if inverted and (len(inverted_props) > 0 or inverted_props_file is not None): raise KGTKException( "--inverted is not allowed with --inverted-props or --inverted-props-file" ) if undirected and (len(undirected_props) > 0 or undirected_props_file is not None): raise KGTKException( "--undirected is not allowed with --undirected-props or --undirected-props-file" ) if depth_limit is not None: if not breadth_first: raise KGTKException( "--depth-limit is not allowed without --breadth-first") if depth_limit <= 0: raise KGTKException("--depth-limit requires a positive argument") root_set: typing.Set = set() if rootfile is not None: if verbose: print("Reading the root file %s" % repr(rootfile), file=error_file, flush=True) try: root_kr: KgtkReader = KgtkReader.open( Path(rootfile), error_file=error_file, who="root", options=root_reader_options, value_options=value_options, verbose=verbose, very_verbose=very_verbose, ) except SystemExit: raise KGTKException("Exiting.") rootcol: int if root_kr.is_edge_file: rootcol = int( rootfilecolumn ) if rootfilecolumn is not None and rootfilecolumn.isdigit( ) else root_kr.get_node1_column_index(rootfilecolumn) elif root_kr.is_node_file: rootcol = int( rootfilecolumn ) if rootfilecolumn is not None and rootfilecolumn.isdigit( ) else root_kr.get_id_column_index(rootfilecolumn) elif rootfilecolumn is not None: rootcol = int( rootfilecolumn ) if rootfilecolumn is not None and rootfilecolumn.isdigit( ) else root_kr.column_name_map.get(rootfilecolumn, -1) else: root_kr.close() raise KGTKException( "The root file is neither an edge nor a node file and the root column name was not supplied." ) if rootcol < 0: root_kr.close() raise KGTKException("Unknown root column %s" % repr(rootfilecolumn)) for row in root_kr: rootnode: str = row[rootcol] root_set.add(rootnode) root_kr.close() if len(root) > 0: if verbose: print("Adding root nodes from the command line.", file=error_file, flush=True) root_group: str for root_group in root: r: str for r in root_group.split(','): if verbose: print("... adding %s" % repr(r), file=error_file, flush=True) root_set.add(r) if len(root_set) == 0: print( "Warning: No nodes in the root set, the output file will be empty.", file=error_file, flush=True) elif verbose: print("%d nodes in the root set." % len(root_set), file=error_file, flush=True) property_set: typing.Set[str] = set() if props_file is not None: if verbose: print("Reading the root file %s" % repr(props_file), file=error_file, flush=True) try: props_kr: KgtkReader = KgtkReader.open( Path(props_file), error_file=error_file, who="props", options=props_reader_options, value_options=value_options, verbose=verbose, very_verbose=very_verbose, ) except SystemExit: raise KGTKException("Exiting.") propscol: int if props_kr.is_edge_file: propscol = int( propsfilecolumn ) if propsfilecolumn is not None and propsfilecolumn.isdigit( ) else props_kr.get_node1_column_index(propsfilecolumn) elif props_kr.is_node_file: propscol = int( propsfilecolumn ) if propsfilecolumn is not None and propsfilecolumn.isdigit( ) else props_kr.get_id_column_index(propsfilecolumn) elif propsfilecolumn is not None: propscol = int( propsfilecolumn ) if propsfilecolumn is not None and propsfilecolumn.isdigit( ) else props_kr.column_name_map.get(propsfilecolumn, -1) else: props_kr.close() raise KGTKException( "The props file is neither an edge nor a node file and the root column name was not supplied." ) if propscol < 0: props_kr.close() raise KGTKException("Unknown props column %s" % repr(propsfilecolumn)) for row in props_kr: property_name: str = row[propscol] property_set.add(property_name) props_kr.close() if len(props) > 0: # Filter the graph, G, to include only edges where the predicate (label) # column contains one of the selected properties. prop_group: str for prop_group in props: prop: str for prop in prop_group.split(','): property_set.add(prop) if verbose and len(property_set) > 0: print("property set=%s" % " ".join(sorted(list(property_set))), file=error_file, flush=True) undirected_property_set: typing.Set[str] = set() if undirected_props_file is not None: if verbose: print("Reading the undirected properties file %s" % repr(undirected_props_file), file=error_file, flush=True) try: undirected_props_kr: KgtkReader = KgtkReader.open( Path(undirected_props_file), error_file=error_file, who="undirected_props", options=undirected_props_reader_options, value_options=value_options, verbose=verbose, very_verbose=very_verbose, ) except SystemExit: raise KGTKException("Exiting.") undirected_props_col: int if undirected_props_kr.is_edge_file: undirected_props_col = int( undirectedpropsfilecolumn ) if undirectedpropsfilecolumn is not None and undirectedpropsfilecolumn.isdigit( ) else undirected_props_kr.get_node1_column_index( undirectedpropsfilecolumn) elif undirected_props_kr.is_node_file: undirected_props_col = int( undirectedpropsfilecolumn ) if undirectedpropsfilecolumn is not None and undirectedpropsfilecolumn.isdigit( ) else undirected_props_kr.get_id_column_index( undirectedpropsfilecolumn) elif undirectedpropsfilecolumn is not None: undirected_props_col = int( undirectedpropsfilecolumn ) if undirectedpropsfilecolumn is not None and undirectedpropsfilecolumn.isdigit( ) else undirected_props_kr.column_name_map.get( undirectedpropsfilecolumn, -1) else: undirected_props_kr.close() raise KGTKException( "The undirected props file is neither an edge nor a node file and the root column name was not supplied." ) if undirected_props_col < 0: undirected_props_kr.close() raise KGTKException("Unknown undirected properties column %s" % repr(undirectedpropsfilecolumn)) for row in undirected_props_kr: undirected_property_name: str = row[undirected_props_col] undirected_property_set.add(undirected_property_name) undirected_props_kr.close() if len(undirected_props) > 0: # Edges where the predicate (label) column contains one of the selected # properties will be treated as undirected links. und_prop_group: str for und_prop_group in undirected_props: und_prop: str for und_prop in und_prop_group.split(','): undirected_property_set.add(und_prop) if verbose and len(undirected_property_set) > 0: print("undirected property set=%s" % " ".join(sorted(list(undirected_property_set))), file=error_file, flush=True) inverted_property_set: typing.Set[str] = set() if inverted_props_file is not None: if verbose: print("Reading the inverted properties file %s" % repr(inverted_props_file), file=error_file, flush=True) try: inverted_props_kr: KgtkReader = KgtkReader.open( Path(inverted_props_file), error_file=error_file, who="inverted_props", options=inverted_props_reader_options, value_options=value_options, verbose=verbose, very_verbose=very_verbose, ) except SystemExit: raise KGTKException("Exiting.") inverted_props_col: int if inverted_props_kr.is_edge_file: inverted_props_col = int( invertedpropsfilecolumn ) if invertedpropsfilecolumn is not None and invertedpropsfilecolumn.isdigit( ) else inverted_props_kr.get_node1_column_index( invertedpropsfilecolumn) elif inverted_props_kr.is_node_file: inverted_props_col = int( invertedpropsfilecolumn ) if invertedpropsfilecolumn is not None and invertedpropsfilecolumn.isdigit( ) else inverted_props_kr.get_id_column_index( invertedpropsfilecolumn) elif invertedpropsfilecolumn is not None: inverted_props_col = int( invertedpropsfilecolumn ) if invertedpropsfilecolumn is not None and invertedpropsfilecolumn.isdigit( ) else inverted_props_kr.column_name_map.get( invertedpropsfilecolumn, -1) else: inverted_props_kr.close() raise KGTKException( "The inverted props file is neither an edge nor a node file and the root column name was not supplied." ) if inverted_props_col < 0: inverted_props_kr.close() raise KGTKException("Unknown inverted properties column %s" % repr(invertedpropsfilecolumn)) for row in inverted_props_kr: inverted_property_name: str = row[inverted_props_col] inverted_property_set.add(inverted_property_name) inverted_props_kr.close() if len(inverted_props) > 0: # Edges where the predicate (label) column contains one of the selected # properties will have the source and target columns swapped. inv_prop_group: str for inv_prop_group in inverted_props: inv_prop: str for inv_prop in inv_prop_group.split(','): inverted_property_set.add(inv_prop) if verbose and len(inverted_property_set): print("inverted property set=%s" % " ".join(sorted(list(inverted_property_set))), file=error_file, flush=True) try: kr: KgtkReader = KgtkReader.open( input_kgtk_file, error_file=error_file, who="input", options=input_reader_options, value_options=value_options, verbose=verbose, very_verbose=very_verbose, ) except SystemExit: raise KGTKException("Exiting.") sub: int = kr.get_node1_column_index(subject_column_name) if sub < 0: print("Unknown subject column %s" % repr(subject_column_name), file=error_file, flush=True) pred: int = kr.get_label_column_index(predicate_column_name) if pred < 0: print("Unknown predicate column %s" % repr(predicate_column_name), file=error_file, flush=True) obj: int = kr.get_node2_column_index(object_column_name) if obj < 0: print("Unknown object column %s" % repr(object_column_name), file=error_file, flush=True) if sub < 0 or pred < 0 or obj < 0: kr.close() raise KGTKException("Exiting due to unknown column.") if verbose: print("special columns: sub=%d pred=%d obj=%d" % (sub, pred, obj), file=error_file, flush=True) # G = load_graph_from_csv(filename,not(undirected),skip_first=not(header_bool),hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj)) G = load_graph_from_kgtk(kr, directed=not undirected, inverted=inverted, ecols=(sub, obj), pcol=pred, pset=property_set, upset=undirected_property_set, ipset=inverted_property_set, verbose=verbose, out=error_file) name = G.vp[ "name"] # Get the vertex name property map (vertex to ndoe1 (subject) name) if show_properties: print("Graph name=%s" % repr(name), file=error_file, flush=True) print("Graph properties:", file=error_file, flush=True) key: typing.Any for key in G.properties: print(" %s: %s" % (repr(key), repr(G.properties[key])), file=error_file, flush=True) index_list = [] for v in G.vertices(): if name[v] in root_set: index_list.append(v) if len(index_list) == 0: print( "Warning: No root nodes found in the graph, the output file will be empty.", file=error_file, flush=True) elif verbose: print("%d root nodes found in the graph." % len(index_list), file=error_file, flush=True) output_header: typing.List[str] = ['node1', 'label', 'node2'] try: kw: KgtkWriter = KgtkWriter.open(output_header, output_kgtk_file, mode=KgtkWriter.Mode.EDGE, require_all_columns=True, prohibit_extra_columns=True, fill_missing_columns=False, verbose=verbose, very_verbose=very_verbose) except SystemExit: raise KGTKException("Exiting.") for index in index_list: if selflink_bool: kw.writerow([name[index], label, name[index]]) if breadth_first: if depth_limit is None: for e in bfs_iterator(G, G.vertex(index)): kw.writerow([name[index], label, name[e.target()]]) else: class DepthExceeded(Exception): pass class DepthLimitedVisitor(BFSVisitor): def __init__(self, name, pred, dist): self.name = name self.pred = pred self.dist = dist def tree_edge(self, e): self.pred[e.target()] = int(e.source()) newdist = self.dist[e.source()] + 1 if depth_limit is not None and newdist > depth_limit: raise DepthExceeded self.dist[e.target()] = newdist kw.writerow([name[index], label, name[e.target()]]) dist = G.new_vertex_property("int") pred = G.new_vertex_property("int64_t") try: bfs_search(G, G.vertex(index), DepthLimitedVisitor(name, pred, dist)) except DepthExceeded: pass else: for e in dfs_iterator(G, G.vertex(index)): kw.writerow([name[index], label, name[e.target()]]) kw.close() kr.close()
def extract_steiner_tree(sp_tree, terminals, return_nodes=True): """given spanning tree and terminal nodes, extract the minimum steiner tree that spans terminals Args: ------------ sp_tree: spanning tree terminals: list of integers return_nodes: bool, return set<int> if True, GraphView otherwise Return: ----------- GraphView | sec<int>: the steiner tree or the set of nodes algorithm idea: 1. BFS from any `s \in terminals`, to the other terminals, `terminals - {s}` 2. traverse back from each `v \in terminals-{s}` to s and collect the edges - note that traversal is terminated if some node is already traversed (in other words, edges are added already) running time: O(E) """ terminals = copy(terminals) # iterative use of obs if not isinstance(terminals, list): terminals = list(set(terminals)) assert len(terminals) > 0 # predecessor map, int -> int pred = dict(zip(extract_nodes(sp_tree), itertools.repeat((-1, None)))) class Visitor(BFSVisitor): """record the predecessor""" def __init__(self, pred): self.pred = pred def tree_edge(self, e): # optimization here # stores (source, edge) # because getting edge is expensive in graph_tool self.pred[int(e.target())] = (int(e.source()), e) vis = Visitor(pred) st_edges = set() visited = dict(zip(extract_nodes(sp_tree), repeat(False))) nodes_visited = set() s = terminals[0] bfs_search(sp_tree, source=s, visitor=vis) while len(terminals) > 0: x = terminals.pop() nodes_visited.add(x) if visited[x]: continue visited[x] = True # get edges from x to s y, e = vis.pred[x] while y >= 0: nodes_visited.add(y) # 0 can be node, `while y` is wrong st_edges.add(e) if visited[y]: break visited[y] = True x = y y, e = vis.pred[x] if return_nodes: return nodes_visited else: vfilt = sp_tree.new_vertex_property('bool') vfilt.a = False for v, flag in visited.items(): if flag: vfilt.a[v] = True efilt = sp_tree.new_edge_property('bool') efilt.a = False for e in st_edges: efilt[e] = True return GraphView(sp_tree, vfilt=vfilt, efilt=efilt)
def reverse_bfs(topdown_tree, verbose=False): """bfs starting from leaves edges coming out from root (top-down) """ queue = get_leaves(topdown_tree, deg='out') if verbose: print('leaves', queue) if not isinstance(queue, list): queue = list(set(queue)) assert len(queue) > 0 # get the map from child to parent pred = dict(zip(extract_nodes(topdown_tree), itertools.repeat(-1))) class Visitor(BFSVisitor): """record the predecessor""" def __init__(self, pred): self.pred = pred def tree_edge(self, e): self.pred[int(e.target())] = int(e.source()) vis = Visitor(pred) visited = dict(zip(extract_nodes(topdown_tree), repeat(False))) nodes_visited = [] nodes_visited += list(queue) for v in nodes_visited: visited[v] = True s = get_root(topdown_tree, tree_type='topdown') # print(topdown_tree) # print(s) # assert isinstance(s, int), 'type(s)={}'.format(type(s)) if verbose: print('root', s) bfs_search(GraphView(topdown_tree, directed=False), source=s, visitor=vis) if verbose: print('vis.pred', vis.pred) while len(queue) > 0: x = queue.pop(0) if verbose: print('visiting ', x) # if visited[x]: # print('visited') # continue # # nodes_visited.append(x) # visited[x] = True # BFS y = vis.pred[x] if verbose: print('visiting y', y) if y >= 0: # has parent if not visited[y]: if verbose: print('not visited') nodes_visited.append(y) visited[y] = True queue.append(y) return nodes_visited
def bfs_one_source(g, source): dist = g.new_vertex_property("int") bfs_search(g, g.vertex(source), SimpleVisitor(dist)) # print(dist.a) return {i: x for i, x in enumerate(dist.a)}