Exemple #1
0
def synthesize_random_trees(stop_probability: float, max_nodes=500):
    assert 0 < stop_probability < 1
    available_nodes = [('()', ('child',)), ('[]', ('child',)), ('{}', ('child',)), ('<>', ('child',)), ]
    selected_node_type = available_nodes[random.randint(0, len(available_nodes) - 1)]
    seq_node_type = ('SEQ', ('child', 'next'))
    root = Node('Start', ('child',))

    num_generated = 1
    to_generate = [(root, 'child')]
    while len(to_generate) > 0:
        next_node, property_to_generate = to_generate.pop()
        p_stop = random.random()
        seq_selection = random.random()
        if (
                    p_stop < stop_probability and not next_node.name == 'SEQ' and not next_node.name == 'Start') or num_generated > max_nodes:
            next_node.set_children_for_property(property_to_generate, (Node('Empty', (), parent=next_node)))
            continue
        num_generated += 1
        current_node_type = selected_node_type
        if seq_selection < 0.25:
            current_node_type = seq_node_type
        child = Node(current_node_type[0], current_node_type[1], parent=next_node)
        next_node.set_children_for_property(property_to_generate, (child,))
        for child_property in current_node_type[1]:
            to_generate.append((child, child_property))
    return root
Exemple #2
0
def generate_all_trees(current_tree, max_tree_size=7):
    if len(current_tree) > max_tree_size + 1:
        return

    empty_positions = [(n, p) for n in current_tree for p in n.properties
                       if len(n[p]) == 0]
    if len(current_tree) + len(empty_positions) > max_tree_size + 1:
        return
    if len(empty_positions) == 0:
        yield current_tree
        return

    for symbol in all_symbols:
        tree_copy = deepcopy(current_tree)
        node, property = next(
            (n, p) for n in tree_copy for p in n.properties if len(n[p]) == 0)
        child = Node(symbol, (), parent=node)
        node.set_children_for_property(property, (child, ))
        yield from generate_all_trees(tree_copy)

    for non_terminal_name, _, properties in non_terminals:
        tree_copy = deepcopy(current_tree)
        node, property = next(
            (n, p) for n in tree_copy for p in n.properties if len(n[p]) == 0)
        child = Node(non_terminal_name, properties, parent=node)
        node.set_children_for_property(property, (child, ))
        yield from generate_all_trees(tree_copy)
def parse_ast(ast_data):
    """
    Given an AST convert it to Node representation.
    :param ast_data:
    :return:
    """
    root_node = Node(
        ast_data["Name"],
        properties=tuple(
            p for p in ast_data["Children"]) if "Children" in ast_data else (),
        symbol=None if "Symbol" not in ast_data else ast_data["Symbol"])
    to_visit = [(ast_data, root_node)]

    while len(to_visit) > 0:
        node_data, current_node = to_visit.pop()
        if "Children" not in node_data: continue
        for property_name, child_node in node_data["Children"].items():
            node = Node(child_node["Name"],
                        properties=tuple(p for p in child_node["Children"])
                        if "Children" in child_node else (),
                        symbol=None if "Symbol" not in child_node else
                        child_node["Symbol"],
                        parent=current_node)
            current_node.set_children_for_property(property_name, node)
            to_visit.append((child_node, node))

    return root_node
Exemple #4
0
def cnf_to_eqnet(clauses, parent=None, log_depth=False):
    assert clauses

    if parent is None:
        start = Node("Start", ("child", ))
        cnf_node = cnf_to_eqnet(clauses, start, log_depth)
        start.set_children_for_property("child", [cnf_node])
        return list('whatever'), start

    if len(clauses) == 1:
        return clause_to_tree(clauses[0], parent, log_depth)
    else:
        border = int(len(clauses) / 2) if log_depth else 1
        and_node = Node('And', ('left', 'right'), parent=parent)
        left = cnf_to_eqnet(clauses[:border], and_node, log_depth)
        and_node.set_children_for_property('left', [left])
        right = cnf_to_eqnet(clauses[border:], and_node, log_depth)
        and_node.set_children_for_property('right', [right])
        return and_node
Exemple #5
0
def variable_to_tree(num: int, parent):
    letter = chr(ord('a') + abs(num) - 1)
    assert 'a' <= letter <= 'z'

    if num >= 0:
        return Node(letter, (), letter, parent)
    else:
        not_node = Node('Not', ('child', ), parent=parent)
        literal_node = Node(letter, (), letter, not_node)
        not_node.set_children_for_property('child', [literal_node])
        return not_node
Exemple #6
0
def clause_to_tree(clause: List[int], parent, log_depth=False):
    assert clause
    clause = list(clause)

    if len(clause) == 1:
        return variable_to_tree(clause[0], parent)
    else:
        border = int(len(clause) / 2) if log_depth else 1
        or_node = Node('Or', ('left', 'right'), parent=parent)
        left = clause_to_tree(clause[:border], or_node, log_depth)
        or_node.set_children_for_property('left', [left])
        right = clause_to_tree(clause[border:], or_node, log_depth)
        or_node.set_children_for_property('right', [right])
        return or_node
Exemple #7
0
def tree_copy_with_start(tree: Node):
    if tree.name == 'Start':
        return tree

    root = Node('Start', ('child', ), tree.symbol)

    original_root_copy = Node(tree.name,
                              tree.properties,
                              tree.symbol,
                              parent=root)
    root.set_children_for_property('child', [original_root_copy])
    to_visit = [(tree, original_root_copy)]
    while len(to_visit) > 0:
        original, copy = to_visit.pop()
        for property_name in original.properties:
            children_copies = tuple(
                Node(c.name, c.properties, c.symbol, parent=copy)
                for c in original[property_name])
            copy.set_children_for_property(property_name, children_copies)
            to_visit.extend(list(zip(original[property_name],
                                     children_copies)))
    return root
Exemple #8
0
        print("Usage <outputFilenamePrefix>")
        sys.exit(-1)
    synthesized_expressions = defaultdict(lambda: (list(), 0))
    num_times_returned_no_tree = 0
    num_times_returned_duplicate = 0

    def print_stats():
        print("Generated None %s times, duplicates %s times" %
              (num_times_returned_no_tree, num_times_returned_duplicate))
        print("Generated %s unique expressions (%s in total)" %
              (len(synthesized_expressions),
               sum(len(e[0]) for e in synthesized_expressions.values())))

    max_num_elements = 500  # max num of expressions per semantically equivalent set to sample (reservoir sampling)

    tree_generator = generate_all_trees(Node('Start', ('child', )))
    for i, tree in tqdm(enumerate(tree_generator)):
        if i % 5000 == 4999:
            print_stats()

        tokens = to_token_sequence(tree, [])
        expression = simplify_logic(parse_expr(''.join(tokens)), form='dnf')
        expression_str = str(expression)
        all_elements, count = synthesized_expressions[expression_str]
        if len(all_elements) < max_num_elements:  # Reservoir sampling
            all_elements.append((tokens, tree))
        else:
            idx = random.randint(0, count)
            if idx < max_num_elements:
                all_elements[idx] = tokens, tree
        synthesized_expressions[expression_str] = all_elements, count + 1
Exemple #9
0
    def setUp(self):
        self._root = Node("Node1", ["A", "B", "C"])

        self._c1 = Node("Node2", [], parent=self._root)
        self._c2 = Node("Node3", ["D"], parent=self._root)
        self._root.set_children_for_property("A", (self._c1, self._c2))

        self._c3 = Node("Node4", ["D"], parent=self._root)
        self._root.set_children_for_property("B", [self._c3])

        self._c4 = Node("Node5", [], parent=self._c3)
        self._c3.set_children_for_property("D", (self._c4))

        self._c5 = Node("Node6", [], parent=self._root)
        self._root.set_children_for_property("C", [self._c5])
Exemple #10
0
class TestTree(unittest.TestCase):
    def setUp(self):
        self._root = Node("Node1", ["A", "B", "C"])

        self._c1 = Node("Node2", [], parent=self._root)
        self._c2 = Node("Node3", ["D"], parent=self._root)
        self._root.set_children_for_property("A", (self._c1, self._c2))

        self._c3 = Node("Node4", ["D"], parent=self._root)
        self._root.set_children_for_property("B", [self._c3])

        self._c4 = Node("Node5", [], parent=self._c3)
        self._c3.set_children_for_property("D", (self._c4))

        self._c5 = Node("Node6", [], parent=self._root)
        self._root.set_children_for_property("C", [self._c5])

    def test_location(self):
        self.assertEqual(self._c1.parent_and_pos(), (self._root, "A", 0))
        self.assertEqual(self._c2.parent_and_pos(), (self._root, "A", 1))
        self.assertEqual(self._c3.parent_and_pos(), (self._root, "B", 0))
        self.assertEqual(self._c4.parent_and_pos(), (self._c3, "D", 0))

    def test_preorder(self):
        preorder = [n for n in self._root]
        self.assertEqual(preorder, [self._root, self._c1, self._c2, self._c3, self._c4, self._c5])

    def test_maximal_size_subtree(self):
        n1 = Node("Node1", ("A", "B"))
        n2 = Node("Node2", ())
        n1.set_children_for_property("A", tuple(n2))
        n3 = Node("Node1", ("A", "B"))
        n1.set_children_for_property('B', tuple(n3))
        n4 = Node("Node3", ())
        n3.set_children_for_property('A', tuple(n4))
        n5 = Node("Node3", ())
        n3.set_children_for_property('B', tuple(n5))

        self.assertEqual(len(n1.maximal_common_subtree(n1)), 5)
        self.assertEqual(str(n1.maximal_common_subtree(n1)), str(n1))
        self.assertEqual(len(n3.maximal_common_subtree(n1)), 3)
        self.assertEqual(str(n3.maximal_common_subtree(n1)), str(n3))

        m1 = Node('Node2', ())
        self.assertEqual(len(m1.maximal_common_subtree(n1)), 1)
        self.assertEqual(len(m1.maximal_common_subtree(n2)), 1)
        self.assertEqual(m1.maximal_common_subtree(n3), None)

        m1 = Node("Node1", ("A", "B"))
        m2 = Node("Node4", ())
        m1.set_children_for_property('A', (m2))
        m3 = Node("Node3", ())
        m1.set_children_for_property('B', (m3))
        self.assertEqual(len(m1.maximal_common_subtree(n1)), 2)
        self.assertEqual(len(n1.maximal_common_subtree(m1)), 2)

    def test_print(self):
        pass
Exemple #11
0
    def test_maximal_size_subtree(self):
        n1 = Node("Node1", ("A", "B"))
        n2 = Node("Node2", ())
        n1.set_children_for_property("A", tuple(n2))
        n3 = Node("Node1", ("A", "B"))
        n1.set_children_for_property('B', tuple(n3))
        n4 = Node("Node3", ())
        n3.set_children_for_property('A', tuple(n4))
        n5 = Node("Node3", ())
        n3.set_children_for_property('B', tuple(n5))

        self.assertEqual(len(n1.maximal_common_subtree(n1)), 5)
        self.assertEqual(str(n1.maximal_common_subtree(n1)), str(n1))
        self.assertEqual(len(n3.maximal_common_subtree(n1)), 3)
        self.assertEqual(str(n3.maximal_common_subtree(n1)), str(n3))

        m1 = Node('Node2', ())
        self.assertEqual(len(m1.maximal_common_subtree(n1)), 1)
        self.assertEqual(len(m1.maximal_common_subtree(n2)), 1)
        self.assertEqual(m1.maximal_common_subtree(n3), None)

        m1 = Node("Node1", ("A", "B"))
        m2 = Node("Node4", ())
        m1.set_children_for_property('A', (m2))
        m3 = Node("Node3", ())
        m1.set_children_for_property('B', (m3))
        self.assertEqual(len(m1.maximal_common_subtree(n1)), 2)
        self.assertEqual(len(n1.maximal_common_subtree(m1)), 2)
    def to_eqnet(self, parent=None):
        if parent is None:
            parent = Node('Start', ('child', ))
            child = self.to_eqnet(parent)
            parent.set_children_for_property('child', (child, ))
            return parent

        if type(self.symbol) == int:
            return Node(chr(ord('a') - 1 + self.symbol), (), parent=parent)

        if self.symbol in BINARY_NONTERMINALS.keys():
            node = Node(BINARY_NONTERMINALS[self.symbol], ('left', 'right'),
                        parent=parent)
            left = self.children[0].to_eqnet(node)
            right = self.children[1].to_eqnet(node)
            node.set_children_for_property('left', (left, ))
            node.set_children_for_property('right', (right, ))
            return node

        if self.symbol == '~':
            node = Node('Not', ('child', ), parent=parent)
            child = self.children[0].to_eqnet(node)
            node.set_children_for_property('child', (child, ))
            return node

        assert False, "unknown symbol '{}'".format(self.symbol)