def synthesize_random_trees(stop_probability: float, max_nodes=500): assert 0 < stop_probability < 1 available_nodes = [('()', ('child',)), ('[]', ('child',)), ('{}', ('child',)), ('<>', ('child',)), ] selected_node_type = available_nodes[random.randint(0, len(available_nodes) - 1)] seq_node_type = ('SEQ', ('child', 'next')) root = Node('Start', ('child',)) num_generated = 1 to_generate = [(root, 'child')] while len(to_generate) > 0: next_node, property_to_generate = to_generate.pop() p_stop = random.random() seq_selection = random.random() if ( p_stop < stop_probability and not next_node.name == 'SEQ' and not next_node.name == 'Start') or num_generated > max_nodes: next_node.set_children_for_property(property_to_generate, (Node('Empty', (), parent=next_node))) continue num_generated += 1 current_node_type = selected_node_type if seq_selection < 0.25: current_node_type = seq_node_type child = Node(current_node_type[0], current_node_type[1], parent=next_node) next_node.set_children_for_property(property_to_generate, (child,)) for child_property in current_node_type[1]: to_generate.append((child, child_property)) return root
def generate_all_trees(current_tree, max_tree_size=7): if len(current_tree) > max_tree_size + 1: return empty_positions = [(n, p) for n in current_tree for p in n.properties if len(n[p]) == 0] if len(current_tree) + len(empty_positions) > max_tree_size + 1: return if len(empty_positions) == 0: yield current_tree return for symbol in all_symbols: tree_copy = deepcopy(current_tree) node, property = next( (n, p) for n in tree_copy for p in n.properties if len(n[p]) == 0) child = Node(symbol, (), parent=node) node.set_children_for_property(property, (child, )) yield from generate_all_trees(tree_copy) for non_terminal_name, _, properties in non_terminals: tree_copy = deepcopy(current_tree) node, property = next( (n, p) for n in tree_copy for p in n.properties if len(n[p]) == 0) child = Node(non_terminal_name, properties, parent=node) node.set_children_for_property(property, (child, )) yield from generate_all_trees(tree_copy)
def parse_ast(ast_data): """ Given an AST convert it to Node representation. :param ast_data: :return: """ root_node = Node( ast_data["Name"], properties=tuple( p for p in ast_data["Children"]) if "Children" in ast_data else (), symbol=None if "Symbol" not in ast_data else ast_data["Symbol"]) to_visit = [(ast_data, root_node)] while len(to_visit) > 0: node_data, current_node = to_visit.pop() if "Children" not in node_data: continue for property_name, child_node in node_data["Children"].items(): node = Node(child_node["Name"], properties=tuple(p for p in child_node["Children"]) if "Children" in child_node else (), symbol=None if "Symbol" not in child_node else child_node["Symbol"], parent=current_node) current_node.set_children_for_property(property_name, node) to_visit.append((child_node, node)) return root_node
def cnf_to_eqnet(clauses, parent=None, log_depth=False): assert clauses if parent is None: start = Node("Start", ("child", )) cnf_node = cnf_to_eqnet(clauses, start, log_depth) start.set_children_for_property("child", [cnf_node]) return list('whatever'), start if len(clauses) == 1: return clause_to_tree(clauses[0], parent, log_depth) else: border = int(len(clauses) / 2) if log_depth else 1 and_node = Node('And', ('left', 'right'), parent=parent) left = cnf_to_eqnet(clauses[:border], and_node, log_depth) and_node.set_children_for_property('left', [left]) right = cnf_to_eqnet(clauses[border:], and_node, log_depth) and_node.set_children_for_property('right', [right]) return and_node
def variable_to_tree(num: int, parent): letter = chr(ord('a') + abs(num) - 1) assert 'a' <= letter <= 'z' if num >= 0: return Node(letter, (), letter, parent) else: not_node = Node('Not', ('child', ), parent=parent) literal_node = Node(letter, (), letter, not_node) not_node.set_children_for_property('child', [literal_node]) return not_node
def clause_to_tree(clause: List[int], parent, log_depth=False): assert clause clause = list(clause) if len(clause) == 1: return variable_to_tree(clause[0], parent) else: border = int(len(clause) / 2) if log_depth else 1 or_node = Node('Or', ('left', 'right'), parent=parent) left = clause_to_tree(clause[:border], or_node, log_depth) or_node.set_children_for_property('left', [left]) right = clause_to_tree(clause[border:], or_node, log_depth) or_node.set_children_for_property('right', [right]) return or_node
def tree_copy_with_start(tree: Node): if tree.name == 'Start': return tree root = Node('Start', ('child', ), tree.symbol) original_root_copy = Node(tree.name, tree.properties, tree.symbol, parent=root) root.set_children_for_property('child', [original_root_copy]) to_visit = [(tree, original_root_copy)] while len(to_visit) > 0: original, copy = to_visit.pop() for property_name in original.properties: children_copies = tuple( Node(c.name, c.properties, c.symbol, parent=copy) for c in original[property_name]) copy.set_children_for_property(property_name, children_copies) to_visit.extend(list(zip(original[property_name], children_copies))) return root
print("Usage <outputFilenamePrefix>") sys.exit(-1) synthesized_expressions = defaultdict(lambda: (list(), 0)) num_times_returned_no_tree = 0 num_times_returned_duplicate = 0 def print_stats(): print("Generated None %s times, duplicates %s times" % (num_times_returned_no_tree, num_times_returned_duplicate)) print("Generated %s unique expressions (%s in total)" % (len(synthesized_expressions), sum(len(e[0]) for e in synthesized_expressions.values()))) max_num_elements = 500 # max num of expressions per semantically equivalent set to sample (reservoir sampling) tree_generator = generate_all_trees(Node('Start', ('child', ))) for i, tree in tqdm(enumerate(tree_generator)): if i % 5000 == 4999: print_stats() tokens = to_token_sequence(tree, []) expression = simplify_logic(parse_expr(''.join(tokens)), form='dnf') expression_str = str(expression) all_elements, count = synthesized_expressions[expression_str] if len(all_elements) < max_num_elements: # Reservoir sampling all_elements.append((tokens, tree)) else: idx = random.randint(0, count) if idx < max_num_elements: all_elements[idx] = tokens, tree synthesized_expressions[expression_str] = all_elements, count + 1
def setUp(self): self._root = Node("Node1", ["A", "B", "C"]) self._c1 = Node("Node2", [], parent=self._root) self._c2 = Node("Node3", ["D"], parent=self._root) self._root.set_children_for_property("A", (self._c1, self._c2)) self._c3 = Node("Node4", ["D"], parent=self._root) self._root.set_children_for_property("B", [self._c3]) self._c4 = Node("Node5", [], parent=self._c3) self._c3.set_children_for_property("D", (self._c4)) self._c5 = Node("Node6", [], parent=self._root) self._root.set_children_for_property("C", [self._c5])
class TestTree(unittest.TestCase): def setUp(self): self._root = Node("Node1", ["A", "B", "C"]) self._c1 = Node("Node2", [], parent=self._root) self._c2 = Node("Node3", ["D"], parent=self._root) self._root.set_children_for_property("A", (self._c1, self._c2)) self._c3 = Node("Node4", ["D"], parent=self._root) self._root.set_children_for_property("B", [self._c3]) self._c4 = Node("Node5", [], parent=self._c3) self._c3.set_children_for_property("D", (self._c4)) self._c5 = Node("Node6", [], parent=self._root) self._root.set_children_for_property("C", [self._c5]) def test_location(self): self.assertEqual(self._c1.parent_and_pos(), (self._root, "A", 0)) self.assertEqual(self._c2.parent_and_pos(), (self._root, "A", 1)) self.assertEqual(self._c3.parent_and_pos(), (self._root, "B", 0)) self.assertEqual(self._c4.parent_and_pos(), (self._c3, "D", 0)) def test_preorder(self): preorder = [n for n in self._root] self.assertEqual(preorder, [self._root, self._c1, self._c2, self._c3, self._c4, self._c5]) def test_maximal_size_subtree(self): n1 = Node("Node1", ("A", "B")) n2 = Node("Node2", ()) n1.set_children_for_property("A", tuple(n2)) n3 = Node("Node1", ("A", "B")) n1.set_children_for_property('B', tuple(n3)) n4 = Node("Node3", ()) n3.set_children_for_property('A', tuple(n4)) n5 = Node("Node3", ()) n3.set_children_for_property('B', tuple(n5)) self.assertEqual(len(n1.maximal_common_subtree(n1)), 5) self.assertEqual(str(n1.maximal_common_subtree(n1)), str(n1)) self.assertEqual(len(n3.maximal_common_subtree(n1)), 3) self.assertEqual(str(n3.maximal_common_subtree(n1)), str(n3)) m1 = Node('Node2', ()) self.assertEqual(len(m1.maximal_common_subtree(n1)), 1) self.assertEqual(len(m1.maximal_common_subtree(n2)), 1) self.assertEqual(m1.maximal_common_subtree(n3), None) m1 = Node("Node1", ("A", "B")) m2 = Node("Node4", ()) m1.set_children_for_property('A', (m2)) m3 = Node("Node3", ()) m1.set_children_for_property('B', (m3)) self.assertEqual(len(m1.maximal_common_subtree(n1)), 2) self.assertEqual(len(n1.maximal_common_subtree(m1)), 2) def test_print(self): pass
def test_maximal_size_subtree(self): n1 = Node("Node1", ("A", "B")) n2 = Node("Node2", ()) n1.set_children_for_property("A", tuple(n2)) n3 = Node("Node1", ("A", "B")) n1.set_children_for_property('B', tuple(n3)) n4 = Node("Node3", ()) n3.set_children_for_property('A', tuple(n4)) n5 = Node("Node3", ()) n3.set_children_for_property('B', tuple(n5)) self.assertEqual(len(n1.maximal_common_subtree(n1)), 5) self.assertEqual(str(n1.maximal_common_subtree(n1)), str(n1)) self.assertEqual(len(n3.maximal_common_subtree(n1)), 3) self.assertEqual(str(n3.maximal_common_subtree(n1)), str(n3)) m1 = Node('Node2', ()) self.assertEqual(len(m1.maximal_common_subtree(n1)), 1) self.assertEqual(len(m1.maximal_common_subtree(n2)), 1) self.assertEqual(m1.maximal_common_subtree(n3), None) m1 = Node("Node1", ("A", "B")) m2 = Node("Node4", ()) m1.set_children_for_property('A', (m2)) m3 = Node("Node3", ()) m1.set_children_for_property('B', (m3)) self.assertEqual(len(m1.maximal_common_subtree(n1)), 2) self.assertEqual(len(n1.maximal_common_subtree(m1)), 2)
def to_eqnet(self, parent=None): if parent is None: parent = Node('Start', ('child', )) child = self.to_eqnet(parent) parent.set_children_for_property('child', (child, )) return parent if type(self.symbol) == int: return Node(chr(ord('a') - 1 + self.symbol), (), parent=parent) if self.symbol in BINARY_NONTERMINALS.keys(): node = Node(BINARY_NONTERMINALS[self.symbol], ('left', 'right'), parent=parent) left = self.children[0].to_eqnet(node) right = self.children[1].to_eqnet(node) node.set_children_for_property('left', (left, )) node.set_children_for_property('right', (right, )) return node if self.symbol == '~': node = Node('Not', ('child', ), parent=parent) child = self.children[0].to_eqnet(node) node.set_children_for_property('child', (child, )) return node assert False, "unknown symbol '{}'".format(self.symbol)