def compress_graph(g: LightMultiGraph, subtree: Set[int], boundary_edges: Any, permanent: bool) -> Union[None, float]: """ :param g: the graph :param subtree: the set of nodes that's compressed :param boundary_edges: boundary edges :param permanent: if disabled, undo the compression after computing the new dl -> returns the float :return: """ assert len( subtree ) > 0, f'Empty subtree g:{g.order(), g.size()}, bound: {boundary_edges}' before = (g.order(), g.size()) if not isinstance(subtree, set): subtree = set(subtree) if boundary_edges is None: # compute the boundary edges boundary_edges = find_boundary_edges(g, subtree) removed_edges = set() removed_nodes = set() # step 1: remove the nodes from subtree, keep track of the removed edges if not permanent: removed_edges = list(g.subgraph(subtree).edges(data=True)) removed_nodes = list(g.subgraph(subtree).nodes(data=True)) g.remove_nodes_from(subtree) new_node = min(subtree) # step 2: replace subtree with new_node g.add_node(new_node, label=len(boundary_edges)) # step 3: rewire new_node for u, v in boundary_edges: if u in subtree: u = new_node if v in subtree: v = new_node g.add_edge(u, v) if not permanent: # if this flag is set, then return the graph dl of the compressed graph and undo the changes compressed_graph_dl = graph_dl(g) # print(f'In compress_graph, dl after change: {compressed_graph_dl:_g}') g.remove_node(new_node) # and the boundary edges g.add_nodes_from(removed_nodes) # add the subtree for e in itertools.chain(removed_edges, boundary_edges): if len(e) == 3: u, v, d = e else: u, v = e d = {'weight': 1} g.add_edge(u, v, weight=d['weight']) after = (g.order(), g.size()) assert before == after, 'Decompression did not work' return compressed_graph_dl else: return None
def __init__(self, g: LightMultiGraph, type: str, root: TreeNode, grammar: VRG, mu: int): super().__init__(g=g, type=type, root=root, grammar=grammar, mu=mu) self.graph_dl = graph_dl( self.g ) # during extraction, compute it once for all the rules because the graph doesn't change self.update_subtree_scores(start_tnode=self.root)
def get_grammar(name: str, clustering: str, grammar_type: str, mu: int, \ path_input: str, path_node_attrs: str, path_edge_attrs: str, path_timestamps: str) -> Tuple[VRG, int]: """ Dump the stats :return: """ original_graph = get_graph(name, path_input, path_node_attrs, path_edge_attrs, path_timestamps) outdir = 'dumps' make_dirs(outdir, name) # make the directories if needed grammar_types = ('mu_random', 'mu_level', 'mu_dl', 'mu_level_dl', 'local_dl', 'global_dl') assert grammar_type in grammar_types, f'Invalid grammar type: {grammar_type}' g_copy = original_graph.copy() list_of_list_clusters = get_clustering(g=g_copy, outdir=f'{outdir}/trees/{name}', clustering=clustering) root = create_tree(list_of_list_clusters) g_dl = graph_dl(original_graph) grammar = VRG(clustering=clustering, type=grammar_type, name=name, mu=mu) g = original_graph.copy() start_time = time() if 'mu' in grammar_type: extractor = MuExtractor(g=g, type=grammar.type, grammar=grammar, mu=mu, root=root) elif 'local' in grammar_type: extractor = LocalExtractor(g=g, type=grammar_type, grammar=grammar, mu=mu, root=root) else: assert grammar_type == 'global_dl', f'improper grammar type {grammar_type}' extractor = GlobalExtractor(g=g, type=grammar.type, grammar=grammar, mu=mu, root=root) extractor.generate_grammar() time_taken = round(time() - start_time, 4) grammar = extractor.grammar tqdm.write( f"name: {name}, original: {g_dl}, grammar: {grammar.cost}, time: {time_taken}" ) return grammar, original_graph.order()
def __init__(self, g:LightMultiGraph, type: str, root: TreeNode, grammar: VRG, mu: int): super().__init__(g=g, type=type, root=root, grammar=grammar, mu=mu) self.final_grammar = grammar.copy() self.graph_dl: float = graph_dl( self.g) # during extraction, compute it once for all the rules because the graph doesn't change self.tnode_to_rule: Dict[TreeNode, PartRule] = {} # maps each tree node to a rule self.rule_id_to_record: Dict[int, Record] = {} # maps each rule (via rule id) to a record object self.update_subtree_scores(start_tnode=self.root) self.update_all_record_scores() # this updates the scores of the records logging.debug('Grammar initialized')
def update_all_record_scores(self) -> None: """ updates the scores of all the record objects - :return: """ g_dl = graph_dl(self.g) # initial graph dl for record in self.rule_id_to_record.values(): rule = self.grammar[record.rule_id] # the rule is larger than mu if rule.graph.order() > self.mu: record.score = float('inf') continue assert rule.frequency == record.frequency, 'the frequencies of the rule and record should match' self.set_record_score(record, g_dl=g_dl) return
def extract_rule(self) -> PartRule: """ Step 0: compute graph dl Step 1: get best tnode Step 2: create rule, add to grammar Step 3: compress graph, update tree :return: """ self.graph_dl = graph_dl(self.g) best_tnode, score = self.get_best_tnode_and_score() logging.debug(f'best tnode: {best_tnode}, score: {round(score, 3)}') subtree = best_tnode.leaves & set(self.g.nodes()) rule, boundary_edges = create_rule(subtree=subtree, g=self.g, mode='part') compress_graph(g=self.g, subtree=subtree, boundary_edges=boundary_edges, permanent=True) self.update_tree(tnode=best_tnode) return rule
def dump_grammar(name: str, clustering: str, grammar_type: str, mu: int) -> None: """ Dump the stats :return: """ original_graph = get_graph(name) outdir = 'dumps' make_dirs(outdir, name) # make the directories if needed grammar_types = ('mu_random', 'mu_level', 'mu_dl', 'mu_level_dl', 'local_dl', 'global_dl') assert grammar_type in grammar_types, f'Invalid grammar type: {grammar_type}' g_copy = original_graph.copy() list_of_list_clusters = get_clustering(g=g_copy, outdir=f'{outdir}/trees/{name}', clustering=clustering) g_dl = graph_dl(original_graph) grammar = VRG(clustering=clustering, type=grammar_type, name=name, mu=mu) g = original_graph.copy() list_of_list_clusters_copy = list_of_list_clusters[:] root = create_tree(list_of_list_clusters_copy) start_time = time() if 'mu' in grammar_type: extractor = MuExtractor(g=g, type=grammar.type, grammar=grammar, mu=mu, root=root) elif 'local' in grammar_type: extractor = LocalExtractor(g=g, type=grammar_type, grammar=grammar, mu=mu, root=root) else: assert grammar_type == 'global_dl', f'improper grammar type {grammar_type}' extractor = GlobalExtractor(g=g, type=grammar.type, grammar=grammar, mu=mu, root=root) extractor.generate_grammar() time_taken = round(time() - start_time, 4) grammar = extractor.grammar row = { 'name': name, 'n': original_graph.order(), 'm': original_graph.size(), 'g_dl': round(g_dl, 3), 'type': grammar_type, 'mu': mu, 'clustering': clustering, '#rules': len(grammar), 'grammar_dl': round(grammar.cost, 3), 'time': time_taken, 'compression': round(grammar.cost / g_dl, 3) } # tqdm.write(f"name: {name}, n: {row['n']}, m: {row['m']}, mu: {row['mu']}, graph_dl: {g_dl}, grammar_dl: {grammar.cost}," # f"compression: {row['compression']}, time: {time_taken}s") tqdm.write( f"name: {name}, original: {g_dl}, grammar: {grammar.cost}, time: {time_taken}" ) return