예제 #1
0
def compress_graph(g: LightMultiGraph, subtree: Set[int], boundary_edges: Any,
                   permanent: bool) -> Union[None, float]:
    """
    :param g: the graph
    :param subtree: the set of nodes that's compressed
    :param boundary_edges: boundary edges
    :param permanent: if disabled, undo the compression after computing the new dl -> returns the float
    :return:
    """
    assert len(
        subtree
    ) > 0, f'Empty subtree g:{g.order(), g.size()}, bound: {boundary_edges}'
    before = (g.order(), g.size())

    if not isinstance(subtree, set):
        subtree = set(subtree)

    if boundary_edges is None:
        # compute the boundary edges
        boundary_edges = find_boundary_edges(g, subtree)

    removed_edges = set()
    removed_nodes = set()
    # step 1: remove the nodes from subtree, keep track of the removed edges
    if not permanent:
        removed_edges = list(g.subgraph(subtree).edges(data=True))
        removed_nodes = list(g.subgraph(subtree).nodes(data=True))
    g.remove_nodes_from(subtree)
    new_node = min(subtree)

    # step 2: replace subtree with new_node
    g.add_node(new_node, label=len(boundary_edges))

    # step 3: rewire new_node
    for u, v in boundary_edges:
        if u in subtree:
            u = new_node
        if v in subtree:
            v = new_node
        g.add_edge(u, v)

    if not permanent:  # if this flag is set, then return the graph dl of the compressed graph and undo the changes
        compressed_graph_dl = graph_dl(g)
        # print(f'In compress_graph, dl after change: {compressed_graph_dl:_g}')
        g.remove_node(new_node)  # and the boundary edges
        g.add_nodes_from(removed_nodes)  # add the subtree

        for e in itertools.chain(removed_edges, boundary_edges):
            if len(e) == 3:
                u, v, d = e
            else:
                u, v = e
                d = {'weight': 1}
            g.add_edge(u, v, weight=d['weight'])

        after = (g.order(), g.size())
        assert before == after, 'Decompression did not work'
        return compressed_graph_dl
    else:
        return None
예제 #2
0
 def __init__(self, g: LightMultiGraph, type: str, root: TreeNode,
              grammar: VRG, mu: int):
     super().__init__(g=g, type=type, root=root, grammar=grammar, mu=mu)
     self.graph_dl = graph_dl(
         self.g
     )  # during extraction, compute it once for all the rules because the graph doesn't change
     self.update_subtree_scores(start_tnode=self.root)
예제 #3
0
def get_grammar(name: str, clustering: str, grammar_type: str, mu: int, \
                path_input: str, path_node_attrs: str, path_edge_attrs: str, path_timestamps: str) -> Tuple[VRG, int]:
    """
    Dump the stats
    :return:
    """
    original_graph = get_graph(name, path_input, path_node_attrs,
                               path_edge_attrs, path_timestamps)
    outdir = 'dumps'
    make_dirs(outdir, name)  # make the directories if needed

    grammar_types = ('mu_random', 'mu_level', 'mu_dl', 'mu_level_dl',
                     'local_dl', 'global_dl')
    assert grammar_type in grammar_types, f'Invalid grammar type: {grammar_type}'

    g_copy = original_graph.copy()

    list_of_list_clusters = get_clustering(g=g_copy,
                                           outdir=f'{outdir}/trees/{name}',
                                           clustering=clustering)
    root = create_tree(list_of_list_clusters)

    g_dl = graph_dl(original_graph)
    grammar = VRG(clustering=clustering, type=grammar_type, name=name, mu=mu)

    g = original_graph.copy()

    start_time = time()
    if 'mu' in grammar_type:
        extractor = MuExtractor(g=g,
                                type=grammar.type,
                                grammar=grammar,
                                mu=mu,
                                root=root)

    elif 'local' in grammar_type:
        extractor = LocalExtractor(g=g,
                                   type=grammar_type,
                                   grammar=grammar,
                                   mu=mu,
                                   root=root)

    else:
        assert grammar_type == 'global_dl', f'improper grammar type {grammar_type}'
        extractor = GlobalExtractor(g=g,
                                    type=grammar.type,
                                    grammar=grammar,
                                    mu=mu,
                                    root=root)

    extractor.generate_grammar()
    time_taken = round(time() - start_time, 4)

    grammar = extractor.grammar

    tqdm.write(
        f"name: {name}, original: {g_dl}, grammar: {grammar.cost}, time: {time_taken}"
    )
    return grammar, original_graph.order()
예제 #4
0
    def __init__(self, g:LightMultiGraph, type: str, root: TreeNode, grammar: VRG, mu: int):
        super().__init__(g=g, type=type, root=root, grammar=grammar, mu=mu)
        self.final_grammar = grammar.copy()
        self.graph_dl: float = graph_dl(
            self.g)  # during extraction, compute it once for all the rules because the graph doesn't change
        self.tnode_to_rule: Dict[TreeNode, PartRule] = {}  # maps each tree node to a rule
        self.rule_id_to_record: Dict[int, Record] = {}  # maps each rule (via rule id) to a record object

        self.update_subtree_scores(start_tnode=self.root)
        self.update_all_record_scores()  # this updates the scores of the records
        logging.debug('Grammar initialized')
예제 #5
0
    def update_all_record_scores(self) -> None:
        """
        updates the scores of all the record objects -
        :return:
        """
        g_dl = graph_dl(self.g)  # initial graph dl

        for record in self.rule_id_to_record.values():
            rule = self.grammar[record.rule_id]

            # the rule is larger than mu
            if rule.graph.order() > self.mu:
                record.score = float('inf')
                continue

            assert rule.frequency == record.frequency, 'the frequencies of the rule and record should match'
            self.set_record_score(record, g_dl=g_dl)
        return
예제 #6
0
    def extract_rule(self) -> PartRule:
        """
        Step 0: compute graph dl
        Step 1: get best tnode
        Step 2: create rule, add to grammar
        Step 3: compress graph, update tree
        :return:
        """
        self.graph_dl = graph_dl(self.g)
        best_tnode, score = self.get_best_tnode_and_score()
        logging.debug(f'best tnode: {best_tnode}, score: {round(score, 3)}')
        subtree = best_tnode.leaves & set(self.g.nodes())

        rule, boundary_edges = create_rule(subtree=subtree, g=self.g, mode='part')

        compress_graph(g=self.g, subtree=subtree, boundary_edges=boundary_edges, permanent=True)
        self.update_tree(tnode=best_tnode)

        return rule
예제 #7
0
def dump_grammar(name: str, clustering: str, grammar_type: str,
                 mu: int) -> None:
    """
    Dump the stats
    :return:
    """
    original_graph = get_graph(name)
    outdir = 'dumps'
    make_dirs(outdir, name)  # make the directories if needed

    grammar_types = ('mu_random', 'mu_level', 'mu_dl', 'mu_level_dl',
                     'local_dl', 'global_dl')
    assert grammar_type in grammar_types, f'Invalid grammar type: {grammar_type}'

    g_copy = original_graph.copy()

    list_of_list_clusters = get_clustering(g=g_copy,
                                           outdir=f'{outdir}/trees/{name}',
                                           clustering=clustering)

    g_dl = graph_dl(original_graph)

    grammar = VRG(clustering=clustering, type=grammar_type, name=name, mu=mu)

    g = original_graph.copy()
    list_of_list_clusters_copy = list_of_list_clusters[:]
    root = create_tree(list_of_list_clusters_copy)
    start_time = time()
    if 'mu' in grammar_type:
        extractor = MuExtractor(g=g,
                                type=grammar.type,
                                grammar=grammar,
                                mu=mu,
                                root=root)

    elif 'local' in grammar_type:
        extractor = LocalExtractor(g=g,
                                   type=grammar_type,
                                   grammar=grammar,
                                   mu=mu,
                                   root=root)

    else:
        assert grammar_type == 'global_dl', f'improper grammar type {grammar_type}'
        extractor = GlobalExtractor(g=g,
                                    type=grammar.type,
                                    grammar=grammar,
                                    mu=mu,
                                    root=root)

    extractor.generate_grammar()
    time_taken = round(time() - start_time, 4)

    grammar = extractor.grammar

    row = {
        'name': name,
        'n': original_graph.order(),
        'm': original_graph.size(),
        'g_dl': round(g_dl, 3),
        'type': grammar_type,
        'mu': mu,
        'clustering': clustering,
        '#rules': len(grammar),
        'grammar_dl': round(grammar.cost, 3),
        'time': time_taken,
        'compression': round(grammar.cost / g_dl, 3)
    }

    # tqdm.write(f"name: {name}, n: {row['n']}, m: {row['m']}, mu: {row['mu']}, graph_dl: {g_dl}, grammar_dl: {grammar.cost},"
    #            f"compression: {row['compression']}, time: {time_taken}s")
    tqdm.write(
        f"name: {name}, original: {g_dl}, grammar: {grammar.cost}, time: {time_taken}"
    )
    return