Ejemplo n.º 1
0
    def get_token_tree(self, token, depth_limit, cooccurrence_threshold):
        # Basic strategy is to pull the entire tree, which can be memory
        # intensive, and then to dfs_tree it to get the specific tree.
        g = rebuild_graph(
            self.repository.pull_graph(
                GetTokenTreeQuery(token, depth_limit, cooccurrence_threshold)))

        print(g.number_of_nodes())
        print(g.number_of_edges())

        if g.number_of_nodes() == 0:
            warn("empty result tree")
            return None

        # At this stage the occurrence properties should have been migrated to
        # in memory graph.  However they won't appear on the tree export yet.

        # So, migrate the strength attributes correctly.
        g2 = transform(g)

        root = get_node_by_attribute(g2, 'content', token)
        g2.nodes[root]['strength'] = None
        tree = dfs_tree_with_node_attributes(g2, root, depth_limit=depth_limit)

        print("Number of nodes in tree", tree.number_of_nodes())
        print("Number of edges in tree", tree.number_of_edges())

        return networkx.tree_data(tree, root)
Ejemplo n.º 2
0
    def export(self, root=None):

        if self.graph_type is 'node_link':
            graph_json = nx.node_link_data(self.graph)
        elif self.graph_type is 'cytoscape':
            graph_json = nx.cytoscape_data(self.graph)
        elif self.graph_type is 'tree':
            if nx.is_directed(self.graph):
                graph_json = nx.tree_data(self.graph, root)
            else:
                print('Graph passed into export data is not directed')
                return 0

        return graph_json
Ejemplo n.º 3
0
    def export_taxonomy_tree(self, root):
        """
        Export the taxonomy tree in a JSON-able format.  Should be interpretable
        by d3-hierarchy, networkx, and the JavaScript TreeModel library.

        Root is a string that specifies the 'content' property of a Taxon node
        with in-degree zero.  That is, root is a string that uniquely names the
        'top' Taxon of a taxonomy.  If the database contains two taxonomies
        with a top-node with the same value for 'content', the behaviour is
        undefined (i.e. this should never happen and is considered a corruption).
        """
        result = self.repository.pull_graph(SlurpTaxonomiesQuery())
        g = rebuild_graph(result)
        root_id = find_root_by_content(g, root)
        tree = dfs_tree_with_node_attributes(g, root_id, depth_limit=None)
        return networkx.tree_data(tree, root_id)
Ejemplo n.º 4
0
    def search_with_taxons(self, token, taxon_uris, depth_limit,
                           cooccurrence_threshold):
        g = rebuild_graph(
            self.repository.pull_graph(
                GetTokenRootWithTaxonFilterQuery(token, taxon_uris,
                                                 depth_limit,
                                                 cooccurrence_threshold)))

        # if the graph is empty then nothing was matched.  re-add the root
        # node
        if is_null_graph(g):
            g.add_node(0, content=token)

        # now reconnect the graph
        root = get_node_by_attribute(g, 'content', token)
        for node in g.nodes:
            g.nodes[node]['strength'] = 0
            if node != root: g.add_edge(root, node)

        g.nodes[root]['strength'] = None

        return networkx.tree_data(g, root)
import json
import networkx as nx

G = nx.DiGraph()
G.add_nodes_from(range(8))
weights = {0: 4, 1: 7, 2: 8, 3: 4, 4: 5, 5: 6, 6: 1, 7: 2}
nx.set_node_attributes(G, weights, 'weight')

edges = [
    (0, 1),
    (0, 2),
    (2, 4),
    (4, 6),
    (4, 7),
    (1, 3),
    (1, 5),
]

G.add_edges_from(edges)

# save in json
node_link_data = nx.tree_data(G, root=0)

with open('tree.json', 'w') as f:
    json.dump(node_link_data, f, indent=4, sort_keys=True)
Ejemplo n.º 6
0

if __name__ == "__main__":
    df = load_data()
    df, child_counter = update_header(df)
    graph, G = create_node_links(df, child_counter)

    # print(graph)
    file_object = open("graph.json", 'w')
    json.dump(graph, file_object)

    # print(G.nodes.data())
    # T = dfs_tree(G, 1)
    T = bfs_tree(G, 1)
    # print(T.nodes.data())

    for node in T.nodes.data():
        nodule = G.nodes[node[0]]
        index = int(node[0])
        for attribute in nodule.keys():
            T.nodes[node[0]][attribute] = nodule[attribute]

    treeJSON = tree_data(T, 1)
    print(treeJSON)

    file_object = open("tree.json", 'w')
    json.dump(treeJSON, file_object)
    # f = open("graph.json", "r")
    # print(f.read())
    # G = nx.readwrite.json_graph.node_link_graph(graph)
                # Skip to the next parent if it doesn't exist as a concrete row
                # This can happen and it's not an error
                if get_uri(source) not in g:
                    continue

                for j in range(i + 1, len(category_sequence)):
                    possible_target = get_concat_id(category_sequence, j)

                    if get_uri(possible_target) in g:
                        g.add_edge(get_uri(source), get_uri(possible_target))
                        break

        sources = [v for v, indegree in g.in_degree() if indegree == 0]
        print("Possible roots:", sources)

        ARTIFICIAL_ROOT = get_uri('00')

        # artificially reparent to form a rooted tree
        g.add_node(ARTIFICIAL_ROOT, content='Theme')
        for source in sources:
            g.add_edge(ARTIFICIAL_ROOT, source)

        tree = dfs_tree_with_node_attributes(g, ARTIFICIAL_ROOT)
        return tree


if __name__ == '__main__':
    obj = SamuelsLoader()
    print("Tree is", networkx.tree_data(obj.load(sys.argv[1]), '00'))
Ejemplo n.º 8
0
sentences = [
    'keep a bar',
    'keep a shop',
    'keep the peace',
    'keep the books'
]


backend = occubrow.backend.OccubrowBackend(Mock(), Mock())

g = networkx.DiGraph()

def consecutive_pairs(sequence):
    for i in range(len(sequence) - 1):
        yield sequence[i], sequence[i + 1]

for sentence in sentences:
    tokens = backend.preprocess(sentence)

    for token1, token2 in consecutive_pairs(tokens):
        g.add_node(token1)
        g.add_node(token2)
        g.add_edge(token1, token2)

result = networkx.tree_data(g, 'keep')



pprint.pprint(result)
Ejemplo n.º 9
0
 def narrow_graph_to_tree(
     self, graph, chosen_source, depth_limit, coocurrence_threshold
 ):
     g2 = dfs_tree_with_node_attributes(graph, chosen_source, depth_limit=depth_limit)
     data = networkx.tree_data(g2, root=chosen_source)
     return data