Exemple #1
0
def traverse_graph(g, edge, delta, upstream=True):
    """Traverse the graph in a breadth-first-search manner

    Parameters
    ----------
    g : the graph to explore
    edge : the starting edge, normally this is the *solution edge*
    delta : signed change in absolute value (eg. tons) on the implementation flow (delta). For example -26.0 (tons)
    upstream : The direction of traversal. When upstream is True, the graph
               is explored upstream first, otherwise downstream first.

    Returns
    -------
    Edge ProperyMap (float)
        The signed change on the edges
    """
    # Property map for keeping track of the visited edge. Once an edge has
    # been visited it won't be processed anymore.

    amount = g.ep.amount
    visited = g.new_edge_property("bool", val=False)
    change = g.new_edge_property("float", val=0.0)
    change[edge] = delta
    visited[edge] = True

    # We are only interested in the edges that define the solution
    g.set_edge_filter(g.ep.include)

    # By default we go upstream first, because 'demand dictates supply'
    if upstream:
        g.set_reversed(True)
        balance_factor = 1 / g.vp.downstream_balance_factor.a
        node = edge.target()
    else:
        g.set_reversed(False)
        balance_factor = g.vp.downstream_balance_factor.a
        node = edge.source()

    node_visitor = NodeVisitor(g.vp["id"], amount, visited, change,
                               balance_factor)
    search.bfs_search(g, node, node_visitor)

    # now go downstream, if we started upstream
    # (or upstream, if we started downstream)

    g.set_reversed(not g.is_reversed())
    node = edge.target() if g.is_reversed() else edge.source()
    # reverse the balancing factors
    node_visitor.balance_factor = 1 / node_visitor.balance_factor
    # print("\nTraversing in 2. direction")
    search.bfs_search(g, node, node_visitor)

    # finally clean up
    del visited
    g.set_reversed(False)
    g.clear_filters()
    return node_visitor.change
Exemple #2
0
    def generate(self):
        if self.verbose:
            print("Generate neuron radii...")
        rrg = RandomRadiusGenerator(self.skeleton, self.source,
                                    self.min_radius, self.max_radius)

        bfs_search(self.skeleton.get_graph(), self.source, rrg)

        return rrg.radius_vp
def remove_redundant_edges_by_bfs(g, root):
    """for undirected grap, remove redundant edges unvisited by BFS"""
    vis = EdgeCollectorVisitor()
    bfs_search(g, source=root, visitor=vis)

    efilt = g.new_edge_property('bool')
    efilt.a = False

    for u, v in vis.edges:
        try:
            efilt[g.edge(u, v)] = True
        except ValueError:
            efilt[g.edge(v, u)] = True

    g.set_edge_filter(efilt)
    return g
Exemple #4
0
def fill_missing_time(g, t, root, obs_nodes, infection_times, debug=False):
    root = int(root)  # force type

    # for each node,
    # get its ancestor and descendent
    # by ancestor, it's the closest observed node on the end to the root
    # by descendent, it's the next closest node on the other end to the leaf
    td_vis = TopDownVisitor(
        np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes)
    bfs_search(t, source=root, visitor=td_vis)

    bu_vis = BottomUpVisitor(
        np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes)
    bottom_up_traversal(t, vis=bu_vis)

    # infer the time
    hidden_nodes = set(map(int, t.vertices())) - set(obs_nodes)
    assert (root not in hidden_nodes), 'root is hidden'

    pred_infection_times = np.array(infection_times)
    dist = shortest_distance(t, source=root)
    for v in hidden_nodes:
        ans, des = td_vis.pred[v], bu_vis.succ[v]
        assert (ans != -1 and des != -1), \
            '{}, {}'.format(v, (t.vertex(v).in_degree(), t.vertex(v).out_degree()))  # 1, 0, v=leave

        if debug:
            print(v, ans, des)

        denum = dist[des] - dist[ans]
        numer = dist[v] - dist[ans]
        pred_infection_times[v] = (
            infection_times[ans] +
            abs(numer / denum * (infection_times[des] - infection_times[ans])))

        if debug:
            assert pred_infection_times[v] > infection_times[ans]
            assert pred_infection_times[v] < infection_times[des]

            print('t(ans), t(des): {}, {}'.format(infection_times[ans],
                                                  infection_times[des]))
            print('numer {}'.format(numer))
            print('denum {}'.format(denum))
            print('pred time {}'.format(pred_infection_times[v]))

    return pred_infection_times
def edges_to_directed_tree(g, root, edges):
    t = Graph(directed=False)
    for _ in range(g.num_vertices()):
        t.add_vertex()

    for u, v in edges:
        t.add_edge(u, v)

    vis = EdgeCollectorVisitor()
    bfs_search(t, source=root, visitor=vis)

    t.clear_edges()
    t.set_directed(True)
    for u, v in vis.edges:
        t.add_edge(u, v)

    return filter_nodes_by_edges(t, edges)
def build_closure(g, terminals, debug=False, verbose=False):
    """build the transitive closure on terminals"""
    def get_edges(dist, root, terminals):
        """get adjacent edges to root with weight"""
        return ((root, t, dist[t]) for t in terminals
                if dist[t] != -1 and t != root)

    terminals = list(terminals)
    gc = Graph(directed=False)

    gc.add_vertex(g.num_vertices())

    edges_with_weight = set()
    r2pred = {}  # root to predecessor map (from bfs)

    # bfs to all other nodes
    for r in terminals:
        if debug:
            print('root {}'.format(r))
        vis = init_visitor(g, r)
        bfs_search(g, source=r, visitor=vis)
        new_edges = set(get_edges(vis.dist, r, terminals))
        if debug:
            print('new edges {}'.format(new_edges))
        edges_with_weight |= new_edges
        r2pred[r] = vis.pred

    for u, v, c in edges_with_weight:
        gc.add_edge(u, v)

    # edge weights
    eweight = gc.new_edge_property('int')
    weights = np.array([c for _, _, c in edges_with_weight])
    eweight.set_2d_array(weights)

    #
    vfilt = gc.new_vertex_property('bool')
    vfilt.a = False
    for v in terminals:
        vfilt[v] = True
    gc.set_vertex_filter(vfilt)
    return gc, eweight, r2pred
def observe_cascade(c,
                    source,
                    q,
                    method='uniform',
                    tree=None,
                    source_includable=False):
    """
    given a cascade `c` and `source`,
    return a list of observed nodes according to probability `q`
    """
    all_infection = np.nonzero(c != -1)[0]
    if not source_includable:
        all_infection = list(set(all_infection) - {source})
    num_obs = int(math.ceil(len(all_infection) * q))

    if num_obs < 2:
        num_obs = 2

    if method == 'uniform':
        return np.random.permutation(all_infection)[:num_obs]
    elif method == 'late':
        return np.argsort(c)[-num_obs:]
    elif method == 'leaves':
        assert tree is not None, 'to get the leaves, the cascade tree is required'
        # extract_steiner_tree(tree, )
        nodes_in_order = reverse_bfs(tree)
        return nodes_in_order[:num_obs]
    elif method == 'bfs-head':
        assert tree is not None, 'the cascade tree is required'
        vis = BFSNodeCollector()
        bfs_search(GraphView(tree, directed=False), source, vis)
        sampling_weights_by_order
        vis.nodes_in_order
        return vis.nodes_in_order[:num_obs]  # head
    elif method == 'bfs-tail':
        assert tree is not None, 'the cascade tree is required'
        vis = BFSNodeCollector()
        bfs_search(GraphView(tree, directed=False), source, vis)
        return vis.nodes_in_order[-num_obs:]  # tail
    else:
        raise ValueError('unknown method {}'.format(method))
def fill_missing_time(g, t, root, obs_nodes, infection_times, debug=False):
    # get ancestor and descendent
    td_vis = TopDownVisitor(np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes)
    bfs_search(t, source=root, visitor=td_vis)

    bu_vis = BottomUpVisitor(np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes)
    bottom_up_traversal(t, vis=bu_vis)

    # infer the time
    hidden_nodes = set(map(int, t.vertices())) - set(obs_nodes)
    assert (root not in hidden_nodes), 'root is hidden'

    pred_infection_times = np.array(infection_times)
    dist = shortest_distance(t, source=root)
    for v in hidden_nodes:
        ans, des = td_vis.pred[v], bu_vis.pred[v]
        assert ans != -1
        assert des != -1, \
                      '{}, {}'.format(v, (t.vertex(v).in_degree(), t.vertex(v).out_degree()))  # 1, 0, v=leave

        if debug:
            print(v, ans, des)
            
        denum = dist[des] - dist[ans]
        numer = dist[v] - dist[ans]
        pred_infection_times[v] = (infection_times[ans] +
                                   abs(numer / denum * (infection_times[des] - infection_times[ans])))
        
        if debug:
            assert pred_infection_times[v] > infection_times[ans]
            assert pred_infection_times[v] < infection_times[des]

            print('t(ans), t(des): {}, {}'.format(infection_times[ans], infection_times[des]))
            print('numer {}'.format(numer))
            print('denum {}'.format(denum))
            print('pred time {}'.format(pred_infection_times[v]))

    return pred_infection_times
Exemple #9
0
def traverse_graph(g, edge, delta, upstream=True):
    """Traverse the graph in a breadth-first-search manner

    Parameters
    ----------
    g : the graph to explore
    edge : the starting edge, normally this is the *solution edge*
    delta : signed change in absolute value (eg. tons) on the implementation flow (delta). For example -26.0 (tons)
    upstream : The direction of traversal. When upstream is True, the graph
               is explored upstream first, otherwise downstream first.

    Returns
    -------
    Edge ProperyMap (float)
        The signed change on the edges
    """
    plot = False

    amount = g.ep.amount
    change = g.new_edge_property("float", val=0.0)
    total_change = g.new_edge_property("float", val=0.0)

    if plot:
        # prepare plotting of intermediate results
        from repair.apps.asmfa.tests import flowmodeltestdata
        flowmodeltestdata.plot_materials(g, file='materials.png')
        flowmodeltestdata.plot_amounts(g, 'amounts.png', 'amount')
        g.ep.change = change

    # We are only interested in the edges that define the solution
    g.set_edge_filter(g.ep.include)
    MAX_ITERATIONS = 20
    balance_factor = g.vp.downstream_balance_factor.a

    # make a first run with the given changes to the implementation edge

    # By default we go upstream first, because 'demand dictates supply'
    if upstream:
        node = edge.source()
        g.set_reversed(True)
        balance_factor = 1 / balance_factor
    else:
        node = edge.target()
        g.set_reversed(False)

    # initialize the node-visitors
    node_visitor = NodeVisitor(g.vp["id"], amount, change, balance_factor)
    node_visitor2 = NodeVisitorBalanceDeltas(g.vp["id"], amount, change,
                                             balance_factor)

    node_visitor.forward = True
    total_change.a[:] = 0
    new_delta = delta
    i = 0
    change[edge] = new_delta
    # start in one direction
    search.bfs_search(g, node, node_visitor)
    change[edge] = new_delta

    if plot:
        ## Plot changes after forward run
        g.ep.change.a[:] = change.a
        flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change')

    node = reverse_graph(g, node_visitor, node_visitor2, edge)
    search.bfs_search(g, node, node_visitor)
    change[edge] = new_delta

    if plot:
        ## Plot changes after backward run
        g.ep.change.a[:] = change.a
        flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change')

    # balance out the changes
    search.bfs_search(g, node, node_visitor2)
    change[edge] = new_delta

    # add up the total changes
    total_change.a += change.a

    if plot:
        ## Plot total changes
        g.ep.change.a[:] = total_change.a
        flowmodeltestdata.plot_amounts(g, f'plastic_deltas_{i}.png', 'change')

    node = reverse_graph(g, node_visitor, node_visitor2, edge)

    if upstream:
        if node.in_degree():
            sum_f = node.in_degree(weight=total_change)
            new_delta = delta - sum_f
        else:
            new_delta = 0
    else:
        if node.out_degree():
            sum_f = node.out_degree(weight=total_change)
            new_delta = delta - sum_f
        else:
            new_delta = 0
    i += 1

    while i < MAX_ITERATIONS and abs(new_delta) > 0.00001:
        change.a[:] = 0
        change[edge] = new_delta

        # start in one direction

        search.bfs_search(g, node, node_visitor)
        change[edge] = 0

        if plot:
            ## Plot changes after forward run
            g.ep.change.a[:] = change.a
            flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change')

        # now go downstream, if we started upstream
        # (or upstream, if we started downstream)
        node = reverse_graph(g, node_visitor, node_visitor2, edge)
        if upstream:
            sum_f = node.out_degree(weight=total_change) + \
                node.out_degree(weight=change)
        else:
            sum_f = node.in_degree(weight=total_change) + \
                node.in_degree(weight=change)
        new_delta = delta - sum_f
        change[edge] = new_delta
        search.bfs_search(g, node, node_visitor)

        if plot:
            ## Plot changes after backward run
            g.ep.change.a[:] = change.a
            flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change')

        # balance out the changes
        search.bfs_search(g, node, node_visitor2)
        change[edge] = 0

        if plot:
            ## Plot changes after balancing
            g.ep.change.a[:] = change.a
            flowmodeltestdata.plot_amounts(g, 'plastic_deltas.png', 'change')

        # add up the total changes
        total_change.a += change.a

        node = reverse_graph(g, node_visitor, node_visitor2, edge)

        if plot:
            ## Plot total changes
            g.ep.change.a[:] = total_change.a
            flowmodeltestdata.plot_amounts(g, f'plastic_deltas_{i}.png',
                                           'change')

        if upstream:
            if node.in_degree():
                sum_f = node.in_degree(weight=total_change)
                new_delta = delta - sum_f
            else:
                new_delta = 0
        else:
            if node.out_degree():
                sum_f = node.out_degree(weight=total_change)
                new_delta = delta - sum_f
            else:
                new_delta = 0
        i += 1

    # finally clean up
    g.set_reversed(False)
    g.clear_filters()
    return total_change
Exemple #10
0
def test_BFSNodeCollectorVisitor(g, expected):
    vis = BFSNodeCollector()
    bfs_search(g, 0, vis)
    assert vis.nodes_in_order == expected
Exemple #11
0
def run(
        input_file: KGTKFiles,
        output_file: KGTKFiles,
        root: typing.Optional[typing.List[str]],
        rootfile,
        rootfilecolumn,
        subject_column_name: typing.Optional[str],
        object_column_name: typing.Optional[str],
        predicate_column_name: typing.Optional[str],
        props: typing.Optional[typing.List[str]],
        props_file: typing.Optional[str],
        propsfilecolumn: typing.Optional[str],
        inverted: bool,
        inverted_props: typing.Optional[typing.List[str]],
        inverted_props_file: typing.Optional[str],
        invertedpropsfilecolumn: typing.Optional[str],
        undirected: bool,
        undirected_props: typing.Optional[typing.List[str]],
        undirected_props_file: typing.Optional[str],
        undirectedpropsfilecolumn: typing.Optional[str],
        label: str,
        selflink_bool: bool,
        show_properties: bool,
        breadth_first: bool,
        depth_limit: typing.Optional[int],
        errors_to_stdout: bool,
        errors_to_stderr: bool,
        show_options: bool,
        verbose: bool,
        very_verbose: bool,
        **kwargs,  # Whatever KgtkFileOptions and KgtkValueOptions want.
):
    import sys
    import csv
    from pathlib import Path
    import time
    from graph_tool.search import dfs_iterator, bfs_iterator, bfs_search, BFSVisitor
    # from graph_tool import load_graph_from_csv
    from graph_tool.util import find_edge
    from kgtk.exceptions import KGTKException
    from kgtk.cli_argparse import KGTKArgumentParser

    from kgtk.gt.gt_load import load_graph_from_kgtk
    from kgtk.io.kgtkwriter import KgtkWriter
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    #Graph-tool names columns that are not subject or object c0, c1... This function finds the number that graph tool assigned to the predicate column
    def find_pred_position(sub, pred, obj):
        if pred < sub and pred < obj:
            return pred
        elif (pred > sub and pred < obj) or (pred < sub and pred > obj):
            return pred - 1
        else:
            return pred - 2

    def get_edges_by_edge_prop(g, p, v):
        return find_edge(g, prop=g.properties[('e', p)], match=v)

    input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file)
    output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file)

    # Select where to send error messages, defaulting to stderr.
    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr

    # Build the option structures.
    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(
        kwargs, who="input", fallback=True)
    root_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(
        kwargs, who="root", fallback=True)
    props_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(
        kwargs, who="props", fallback=True)
    undirected_props_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(
        kwargs, who="undirected_props", fallback=True)
    inverted_props_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(
        kwargs, who="inverted_props", fallback=True)
    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)

    if root is None:
        root = []  # This simplifies matters.

    if props is None:
        props = []  # This simplifies matters.

    if undirected_props is None:
        undirected_props = []  # This simplifies matters.

    if inverted_props is None:
        inverted_props = []  # This simplifies matters.

    if show_options:
        if root is not None:
            print("--root %s" % " ".join(root), file=error_file)
        if rootfile is not None:
            print("--rootfile=%s" % rootfile, file=error_file)
        if rootfilecolumn is not None:
            print("--rootfilecolumn=%s" % rootfilecolumn, file=error_file)
        if subject_column_name is not None:
            print("--subj=%s" % subject_column_name, file=error_file)
        if object_column_name is not None:
            print("--obj=%s" % object_column_name, file=error_file)
        if predicate_column_name is not None:
            print("--pred=%s" % predicate_column_name, file=error_file)

        if props is not None:
            print("--props=%s" % " ".join(props), file=error_file)
        if props_file is not None:
            print("--props-file=%s" % props_file, file=error_file)
        if propsfilecolumn is not None:
            print("--propsfilecolumn=%s" % propsfilecolumn, file=error_file)

        print("--inverted=%s" % str(inverted), file=error_file)
        if inverted_props is not None:
            print("--inverted-props=%s" % " ".join(inverted_props),
                  file=error_file)
        if inverted_props_file is not None:
            print("--inverted-props-file=%s" % inverted_props_file,
                  file=error_file)
        if invertedpropsfilecolumn is not None:
            print("--invertedpropsfilecolumn=%s" % invertedpropsfilecolumn,
                  file=error_file)

        print("--undirected=%s" % str(undirected), file=error_file)
        if undirected_props is not None:
            print("--undirected-props=%s" % " ".join(undirected_props),
                  file=error_file)
        if undirected_props_file is not None:
            print("--undirected-props-file=%s" % undirected_props_file,
                  file=error_file)
        if undirectedpropsfilecolumn is not None:
            print("--undirectedpropsfilecolumn=%s" % undirectedpropsfilecolumn,
                  file=error_file)

        print("--label=%s" % label, file=error_file)
        print("--selflink=%s" % str(selflink_bool), file=error_file)
        print("--breadth-first=%s" % str(breadth_first), file=error_file)
        if depth_limit is not None:
            print("--depth-limit=%d" % depth_limit, file=error_file)
        input_reader_options.show(out=error_file)
        root_reader_options.show(out=error_file)
        props_reader_options.show(out=error_file)
        undirected_props_reader_options.show(out=error_file)
        inverted_props_reader_options.show(out=error_file)
        value_options.show(out=error_file)
        KgtkReader.show_debug_arguments(errors_to_stdout=errors_to_stdout,
                                        errors_to_stderr=errors_to_stderr,
                                        show_options=show_options,
                                        verbose=verbose,
                                        very_verbose=very_verbose,
                                        out=error_file)
        print("=======", file=error_file, flush=True)

    if inverted and (len(inverted_props) > 0
                     or inverted_props_file is not None):
        raise KGTKException(
            "--inverted is not allowed with --inverted-props or --inverted-props-file"
        )

    if undirected and (len(undirected_props) > 0
                       or undirected_props_file is not None):
        raise KGTKException(
            "--undirected is not allowed with --undirected-props or --undirected-props-file"
        )

    if depth_limit is not None:
        if not breadth_first:
            raise KGTKException(
                "--depth-limit is not allowed without --breadth-first")
        if depth_limit <= 0:
            raise KGTKException("--depth-limit requires a positive argument")

    root_set: typing.Set = set()

    if rootfile is not None:
        if verbose:
            print("Reading the root file %s" % repr(rootfile),
                  file=error_file,
                  flush=True)
        try:
            root_kr: KgtkReader = KgtkReader.open(
                Path(rootfile),
                error_file=error_file,
                who="root",
                options=root_reader_options,
                value_options=value_options,
                verbose=verbose,
                very_verbose=very_verbose,
            )
        except SystemExit:
            raise KGTKException("Exiting.")

        rootcol: int
        if root_kr.is_edge_file:
            rootcol = int(
                rootfilecolumn
            ) if rootfilecolumn is not None and rootfilecolumn.isdigit(
            ) else root_kr.get_node1_column_index(rootfilecolumn)
        elif root_kr.is_node_file:
            rootcol = int(
                rootfilecolumn
            ) if rootfilecolumn is not None and rootfilecolumn.isdigit(
            ) else root_kr.get_id_column_index(rootfilecolumn)
        elif rootfilecolumn is not None:
            rootcol = int(
                rootfilecolumn
            ) if rootfilecolumn is not None and rootfilecolumn.isdigit(
            ) else root_kr.column_name_map.get(rootfilecolumn, -1)
        else:
            root_kr.close()
            raise KGTKException(
                "The root file is neither an edge nor a node file and the root column name was not supplied."
            )

        if rootcol < 0:
            root_kr.close()
            raise KGTKException("Unknown root column %s" %
                                repr(rootfilecolumn))

        for row in root_kr:
            rootnode: str = row[rootcol]
            root_set.add(rootnode)
        root_kr.close()

    if len(root) > 0:
        if verbose:
            print("Adding root nodes from the command line.",
                  file=error_file,
                  flush=True)
        root_group: str
        for root_group in root:
            r: str
            for r in root_group.split(','):
                if verbose:
                    print("... adding %s" % repr(r),
                          file=error_file,
                          flush=True)
                root_set.add(r)
    if len(root_set) == 0:
        print(
            "Warning: No nodes in the root set, the output file will be empty.",
            file=error_file,
            flush=True)
    elif verbose:
        print("%d nodes in the root set." % len(root_set),
              file=error_file,
              flush=True)

    property_set: typing.Set[str] = set()
    if props_file is not None:
        if verbose:
            print("Reading the root file %s" % repr(props_file),
                  file=error_file,
                  flush=True)
        try:
            props_kr: KgtkReader = KgtkReader.open(
                Path(props_file),
                error_file=error_file,
                who="props",
                options=props_reader_options,
                value_options=value_options,
                verbose=verbose,
                very_verbose=very_verbose,
            )
        except SystemExit:
            raise KGTKException("Exiting.")

        propscol: int
        if props_kr.is_edge_file:
            propscol = int(
                propsfilecolumn
            ) if propsfilecolumn is not None and propsfilecolumn.isdigit(
            ) else props_kr.get_node1_column_index(propsfilecolumn)
        elif props_kr.is_node_file:
            propscol = int(
                propsfilecolumn
            ) if propsfilecolumn is not None and propsfilecolumn.isdigit(
            ) else props_kr.get_id_column_index(propsfilecolumn)
        elif propsfilecolumn is not None:
            propscol = int(
                propsfilecolumn
            ) if propsfilecolumn is not None and propsfilecolumn.isdigit(
            ) else props_kr.column_name_map.get(propsfilecolumn, -1)
        else:
            props_kr.close()
            raise KGTKException(
                "The props file is neither an edge nor a node file and the root column name was not supplied."
            )

        if propscol < 0:
            props_kr.close()
            raise KGTKException("Unknown props column %s" %
                                repr(propsfilecolumn))

        for row in props_kr:
            property_name: str = row[propscol]
            property_set.add(property_name)
        props_kr.close()

    if len(props) > 0:
        # Filter the graph, G, to include only edges where the predicate (label)
        # column contains one of the selected properties.

        prop_group: str
        for prop_group in props:
            prop: str
            for prop in prop_group.split(','):
                property_set.add(prop)
    if verbose and len(property_set) > 0:
        print("property set=%s" % " ".join(sorted(list(property_set))),
              file=error_file,
              flush=True)

    undirected_property_set: typing.Set[str] = set()
    if undirected_props_file is not None:
        if verbose:
            print("Reading the undirected properties file %s" %
                  repr(undirected_props_file),
                  file=error_file,
                  flush=True)
        try:
            undirected_props_kr: KgtkReader = KgtkReader.open(
                Path(undirected_props_file),
                error_file=error_file,
                who="undirected_props",
                options=undirected_props_reader_options,
                value_options=value_options,
                verbose=verbose,
                very_verbose=very_verbose,
            )
        except SystemExit:
            raise KGTKException("Exiting.")

        undirected_props_col: int
        if undirected_props_kr.is_edge_file:
            undirected_props_col = int(
                undirectedpropsfilecolumn
            ) if undirectedpropsfilecolumn is not None and undirectedpropsfilecolumn.isdigit(
            ) else undirected_props_kr.get_node1_column_index(
                undirectedpropsfilecolumn)
        elif undirected_props_kr.is_node_file:
            undirected_props_col = int(
                undirectedpropsfilecolumn
            ) if undirectedpropsfilecolumn is not None and undirectedpropsfilecolumn.isdigit(
            ) else undirected_props_kr.get_id_column_index(
                undirectedpropsfilecolumn)
        elif undirectedpropsfilecolumn is not None:
            undirected_props_col = int(
                undirectedpropsfilecolumn
            ) if undirectedpropsfilecolumn is not None and undirectedpropsfilecolumn.isdigit(
            ) else undirected_props_kr.column_name_map.get(
                undirectedpropsfilecolumn, -1)
        else:
            undirected_props_kr.close()
            raise KGTKException(
                "The undirected props file is neither an edge nor a node file and the root column name was not supplied."
            )

        if undirected_props_col < 0:
            undirected_props_kr.close()
            raise KGTKException("Unknown undirected properties column %s" %
                                repr(undirectedpropsfilecolumn))

        for row in undirected_props_kr:
            undirected_property_name: str = row[undirected_props_col]
            undirected_property_set.add(undirected_property_name)
        undirected_props_kr.close()
    if len(undirected_props) > 0:
        # Edges where the predicate (label) column contains one of the selected
        # properties will be treated as undirected links.

        und_prop_group: str
        for und_prop_group in undirected_props:
            und_prop: str
            for und_prop in und_prop_group.split(','):
                undirected_property_set.add(und_prop)
    if verbose and len(undirected_property_set) > 0:
        print("undirected property set=%s" %
              " ".join(sorted(list(undirected_property_set))),
              file=error_file,
              flush=True)

    inverted_property_set: typing.Set[str] = set()
    if inverted_props_file is not None:
        if verbose:
            print("Reading the inverted properties file %s" %
                  repr(inverted_props_file),
                  file=error_file,
                  flush=True)
        try:
            inverted_props_kr: KgtkReader = KgtkReader.open(
                Path(inverted_props_file),
                error_file=error_file,
                who="inverted_props",
                options=inverted_props_reader_options,
                value_options=value_options,
                verbose=verbose,
                very_verbose=very_verbose,
            )
        except SystemExit:
            raise KGTKException("Exiting.")

        inverted_props_col: int
        if inverted_props_kr.is_edge_file:
            inverted_props_col = int(
                invertedpropsfilecolumn
            ) if invertedpropsfilecolumn is not None and invertedpropsfilecolumn.isdigit(
            ) else inverted_props_kr.get_node1_column_index(
                invertedpropsfilecolumn)
        elif inverted_props_kr.is_node_file:
            inverted_props_col = int(
                invertedpropsfilecolumn
            ) if invertedpropsfilecolumn is not None and invertedpropsfilecolumn.isdigit(
            ) else inverted_props_kr.get_id_column_index(
                invertedpropsfilecolumn)
        elif invertedpropsfilecolumn is not None:
            inverted_props_col = int(
                invertedpropsfilecolumn
            ) if invertedpropsfilecolumn is not None and invertedpropsfilecolumn.isdigit(
            ) else inverted_props_kr.column_name_map.get(
                invertedpropsfilecolumn, -1)
        else:
            inverted_props_kr.close()
            raise KGTKException(
                "The inverted props file is neither an edge nor a node file and the root column name was not supplied."
            )

        if inverted_props_col < 0:
            inverted_props_kr.close()
            raise KGTKException("Unknown inverted properties column %s" %
                                repr(invertedpropsfilecolumn))

        for row in inverted_props_kr:
            inverted_property_name: str = row[inverted_props_col]
            inverted_property_set.add(inverted_property_name)
        inverted_props_kr.close()

    if len(inverted_props) > 0:
        # Edges where the predicate (label) column contains one of the selected
        # properties will have the source and target columns swapped.

        inv_prop_group: str
        for inv_prop_group in inverted_props:
            inv_prop: str
            for inv_prop in inv_prop_group.split(','):
                inverted_property_set.add(inv_prop)
    if verbose and len(inverted_property_set):
        print("inverted property set=%s" %
              " ".join(sorted(list(inverted_property_set))),
              file=error_file,
              flush=True)

    try:
        kr: KgtkReader = KgtkReader.open(
            input_kgtk_file,
            error_file=error_file,
            who="input",
            options=input_reader_options,
            value_options=value_options,
            verbose=verbose,
            very_verbose=very_verbose,
        )
    except SystemExit:
        raise KGTKException("Exiting.")

    sub: int = kr.get_node1_column_index(subject_column_name)
    if sub < 0:
        print("Unknown subject column %s" % repr(subject_column_name),
              file=error_file,
              flush=True)

    pred: int = kr.get_label_column_index(predicate_column_name)
    if pred < 0:
        print("Unknown predicate column %s" % repr(predicate_column_name),
              file=error_file,
              flush=True)

    obj: int = kr.get_node2_column_index(object_column_name)
    if obj < 0:
        print("Unknown object column %s" % repr(object_column_name),
              file=error_file,
              flush=True)

    if sub < 0 or pred < 0 or obj < 0:
        kr.close()
        raise KGTKException("Exiting due to unknown column.")

    if verbose:
        print("special columns: sub=%d pred=%d obj=%d" % (sub, pred, obj),
              file=error_file,
              flush=True)

    # G = load_graph_from_csv(filename,not(undirected),skip_first=not(header_bool),hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj))
    G = load_graph_from_kgtk(kr,
                             directed=not undirected,
                             inverted=inverted,
                             ecols=(sub, obj),
                             pcol=pred,
                             pset=property_set,
                             upset=undirected_property_set,
                             ipset=inverted_property_set,
                             verbose=verbose,
                             out=error_file)

    name = G.vp[
        "name"]  # Get the vertex name property map (vertex to ndoe1 (subject) name)

    if show_properties:
        print("Graph name=%s" % repr(name), file=error_file, flush=True)
        print("Graph properties:", file=error_file, flush=True)
        key: typing.Any
        for key in G.properties:
            print("    %s: %s" % (repr(key), repr(G.properties[key])),
                  file=error_file,
                  flush=True)

    index_list = []
    for v in G.vertices():
        if name[v] in root_set:
            index_list.append(v)
    if len(index_list) == 0:
        print(
            "Warning: No root nodes found in the graph, the output file will be empty.",
            file=error_file,
            flush=True)
    elif verbose:
        print("%d root nodes found in the graph." % len(index_list),
              file=error_file,
              flush=True)

    output_header: typing.List[str] = ['node1', 'label', 'node2']

    try:
        kw: KgtkWriter = KgtkWriter.open(output_header,
                                         output_kgtk_file,
                                         mode=KgtkWriter.Mode.EDGE,
                                         require_all_columns=True,
                                         prohibit_extra_columns=True,
                                         fill_missing_columns=False,
                                         verbose=verbose,
                                         very_verbose=very_verbose)
    except SystemExit:
        raise KGTKException("Exiting.")

    for index in index_list:
        if selflink_bool:
            kw.writerow([name[index], label, name[index]])

        if breadth_first:
            if depth_limit is None:
                for e in bfs_iterator(G, G.vertex(index)):
                    kw.writerow([name[index], label, name[e.target()]])

            else:

                class DepthExceeded(Exception):
                    pass

                class DepthLimitedVisitor(BFSVisitor):
                    def __init__(self, name, pred, dist):
                        self.name = name
                        self.pred = pred
                        self.dist = dist

                    def tree_edge(self, e):
                        self.pred[e.target()] = int(e.source())
                        newdist = self.dist[e.source()] + 1
                        if depth_limit is not None and newdist > depth_limit:
                            raise DepthExceeded
                        self.dist[e.target()] = newdist
                        kw.writerow([name[index], label, name[e.target()]])

                dist = G.new_vertex_property("int")
                pred = G.new_vertex_property("int64_t")
                try:
                    bfs_search(G, G.vertex(index),
                               DepthLimitedVisitor(name, pred, dist))
                except DepthExceeded:
                    pass
        else:
            for e in dfs_iterator(G, G.vertex(index)):
                kw.writerow([name[index], label, name[e.target()]])

    kw.close()
    kr.close()
def extract_steiner_tree(sp_tree, terminals, return_nodes=True):
    """given spanning tree and terminal nodes, extract the minimum steiner tree that spans terminals
    
    Args:
    ------------

    sp_tree: spanning tree
    terminals: list of integers
    return_nodes: bool, return set<int> if True, GraphView otherwise

    Return:
    -----------
    GraphView | sec<int>: the steiner tree or the set of nodes
    
    algorithm idea:

    1. BFS from any `s \in terminals`, to the other terminals, `terminals - {s}`
    2. traverse back from each `v \in terminals-{s}` to s and collect the edges
       - note that traversal is terminated if some node is already traversed
         (in other words, edges are added already)

    running time: O(E)
    """
    terminals = copy(terminals)  # iterative use of obs

    if not isinstance(terminals, list):
        terminals = list(set(terminals))

    assert len(terminals) > 0

    # predecessor map, int -> int
    pred = dict(zip(extract_nodes(sp_tree), itertools.repeat((-1, None))))

    class Visitor(BFSVisitor):
        """record the predecessor"""
        def __init__(self, pred):
            self.pred = pred

        def tree_edge(self, e):
            # optimization here
            # stores (source, edge)
            # because getting edge is expensive in graph_tool
            self.pred[int(e.target())] = (int(e.source()), e)

    vis = Visitor(pred)

    st_edges = set()

    visited = dict(zip(extract_nodes(sp_tree), repeat(False)))

    nodes_visited = set()
    s = terminals[0]
    bfs_search(sp_tree, source=s, visitor=vis)

    while len(terminals) > 0:
        x = terminals.pop()
        nodes_visited.add(x)
        if visited[x]:
            continue

        visited[x] = True

        # get edges from x to s
        y, e = vis.pred[x]
        while y >= 0:
            nodes_visited.add(y)
            # 0 can be node, `while y` is wrong
            st_edges.add(e)

            if visited[y]:
                break

            visited[y] = True
            x = y
            y, e = vis.pred[x]

    if return_nodes:
        return nodes_visited
    else:
        vfilt = sp_tree.new_vertex_property('bool')
        vfilt.a = False
        for v, flag in visited.items():
            if flag:
                vfilt.a[v] = True

        efilt = sp_tree.new_edge_property('bool')
        efilt.a = False

        for e in st_edges:
            efilt[e] = True

        return GraphView(sp_tree, vfilt=vfilt, efilt=efilt)
def reverse_bfs(topdown_tree, verbose=False):
    """bfs starting from leaves
    
    edges coming out from root (top-down)
    """

    queue = get_leaves(topdown_tree, deg='out')

    if verbose:
        print('leaves', queue)
    if not isinstance(queue, list):
        queue = list(set(queue))

    assert len(queue) > 0

    # get the map from child to parent
    pred = dict(zip(extract_nodes(topdown_tree), itertools.repeat(-1)))

    class Visitor(BFSVisitor):
        """record the predecessor"""
        def __init__(self, pred):
            self.pred = pred

        def tree_edge(self, e):
            self.pred[int(e.target())] = int(e.source())

    vis = Visitor(pred)

    visited = dict(zip(extract_nodes(topdown_tree), repeat(False)))

    nodes_visited = []
    nodes_visited += list(queue)
    for v in nodes_visited:
        visited[v] = True

    s = get_root(topdown_tree, tree_type='topdown')
    # print(topdown_tree)
    # print(s)
    # assert isinstance(s, int), 'type(s)={}'.format(type(s))

    if verbose:
        print('root', s)

    bfs_search(GraphView(topdown_tree, directed=False), source=s, visitor=vis)

    if verbose:
        print('vis.pred', vis.pred)

    while len(queue) > 0:
        x = queue.pop(0)
        if verbose:
            print('visiting ', x)
        # if visited[x]:
        #     print('visited')
        #     continue

        # # nodes_visited.append(x)
        # visited[x] = True

        # BFS
        y = vis.pred[x]
        if verbose:
            print('visiting y', y)
        if y >= 0:  # has parent
            if not visited[y]:
                if verbose:
                    print('not visited')
                nodes_visited.append(y)
                visited[y] = True
                queue.append(y)
    return nodes_visited
Exemple #14
0
def bfs_one_source(g, source):
    dist = g.new_vertex_property("int")
    bfs_search(g, g.vertex(source), SimpleVisitor(dist))
    # print(dist.a)
    return {i: x for i, x in enumerate(dist.a)}