Beispiel #1
0
def optimize_file(data):
    start = time.time()
    ast_json = parse_file_server(
        data["filename"],
        parser_name="typescript",
        data={
            "remove_types": True,
            "deps": data["source_files"]
        },
    )
    base_time = time.time() - start
    assert ast_json is not None
    ref_root = AstNode.fromJson(data["ast"], fields=["gold", "target"])
    root = AstNode.fromJson(ast_json, fields=["gold", "target"])
    num_diffs = ref_root.num_tree_diffs(root)
    # print(ref_root.tree_equal(root, verbose=True))

    data["dependencies"] = optimize_deps(
        data["filename"],
        data["source_files"],
        data["dependencies"],
        ast_json,
        base_time,
    )
    data["ast"] = ast_json
    data["num_diffs"] = num_diffs
    return data
Beispiel #2
0
    def ternary_expr(self,
                     left: AstNode,
                     right: AstNode,
                     depth=0,
                     target_type=None,
                     parent=None):
        root = self.cond_expr.copyNoChildren(parent=parent)

        paren_expr = self.paren_expr.copyNoChildren(parent=root)
        paren_expr.children = [
            self.open_paren_token.copyNoChildren(parent=paren_expr),
            self.gen_bin_expr(depth=depth,
                              target_type=target_type,
                              parent=paren_expr),
            self.close_paren_token.copyNoChildren(parent=paren_expr),
        ]

        left = left.deepCopy(parent=root)
        right = right.deepCopy(parent=root)
        root.children = [
            paren_expr,
            self.question_token.copyNoChildren(parent=root),
            left,
            self.colon_token.copyNoChildren(parent=root),
            right,
        ]
        """
        pick randomly which of left/right should keep target values
        this ensures that the number of nodes to predict and their order is the same for the original and modified tree
        """
        clear_node = random.choice([left, right])
        for node in clear_node.forEachNode():
            node.id = -1
            node.fields.clear()
        return root
Beispiel #3
0
 def find_params(function_def: AstNode):
     assert function_def.down_first() is not None
     c = function_def.down_first()
     while c.type != "OpenParenToken":
         c = c.right()
     assert c.has_right() and c.right().type == "SyntaxList"
     return c.right()
Beispiel #4
0
 def matches(self, node: AstNode):
     if node.type != "SyntaxList":
         return False
     node = node.left()
     if node is None or node.type != "FirstPunctuation":
         return False
     node = node.up()
     if node is None or node.type != "ObjectLiteralExpression":
         return False
     return True
Beispiel #5
0
    def apply(self, tree_id, node: AstNode):
        assert self.matches(node)
        assert node.id != -1
        key = (tree_id, node.id)
        assert key not in self.applied_positions

        assert False, "unsound implementation"
        """
        TODO: the current implementation assumes that structural modifications do not change
        the predictions order. As a result, reordering structural changes will results in wrong evaluation.
        To fix this, we would need to compute a permutation that reorders the predictions in the original order and
        apply it during evaluation.
        """

        properties = [
            child for child in node.children if child.type != "CommaToken"
        ]
        random.shuffle(properties)

        new_block = AstNode(idx=-1, type="SyntaxList", value=self.null_id)
        for prop in properties:
            if new_block.children:
                comma_token = AstNode(idx=-1, type="CommaToken", value=",")
                new_block.add_child(comma_token)
            new_block.add_child(prop)

        # "remember where the change was applied such that it can be reverted later"
        self.applied_positions[key] = (node, new_block)
        node.swapNodes(new_block)
Beispiel #6
0
    def apply(self, tree_id, node: AstNode):
        # assert self.matches(node)
        assert node.id != -1
        key = (tree_id, node.id)
        assert key not in self.applied_positions

        size = random.randint(1, 4)
        expr = self.gen_array_expression(node, size,
                                         random.randint(0, size - 1))
        "remember where the change was applied such that it can be reverted later"
        self.applied_positions[key] = (node, expr)
        node.swapNodes(expr)
Beispiel #7
0
    def apply(self, tree_id, node: AstNode):
        # assert self.matches(node)
        assert node.id != -1
        key = (tree_id, node.id)
        assert key not in self.applied_positions

        cond_depth = random.randint(0, 3)
        expr = self.expr_gen.ternary_expr(node, node, cond_depth)
        "remember where the change was applied such that it can be reverted later"
        self.applied_positions[key] = (node, expr)
        node.swapNodes(expr)
        return expr
Beispiel #8
0
    def gen_constant(self, target_type=None, parent=None):
        if target_type is None:
            target_type = random.choice(self.constant_types)

        node_type = random.choice(
            self.constant_types_to_node_type[target_type])
        node_value = random.choice(self.value_index.per_type_values[node_type])
        astnode = AstNode(
            idx=PositionIDs.ADVERSARIAL_CONSTANT,
            parent=parent,
            type=node_type,
            value=node_value,
        )
        astnode.origin = PositionIDs.ADVERSARIAL_CONSTANT
        return astnode
Beispiel #9
0
    def apply(self, tree_id, node: AstNode):
        assert self.matches(node)

        def find_args(function_def: AstNode):
            assert function_def.down_first() is not None
            c = function_def.down_first()
            while c.type != "OpenParenToken":
                c = c.right()
            assert c.has_right() and c.right().type == "SyntaxList"
            return c.right()

        args = find_args(node)
        new_args = args.deepCopy(parent=args.parent)
        for i in range(random.randint(1, 2)):
            if new_args.children:
                comma_token = AstNode(idx=-1, type="CommaToken", value=",")
                new_args.add_child(comma_token)
            expr = self.expr_gen.gen_bin_expr(depth=random.randint(0, 2))
            new_args.add_child(expr)

        assert args.id != -1
        key = (tree_id, args.id)
        assert key not in self.applied_positions
        "remember where the change was applied such that it can be reverted later"
        self.applied_positions[key] = (args, new_args)
        args.swapNodes(new_args)
Beispiel #10
0
 def test_single_node(self):
     node = AstNode.fromJson([{"id": 0, "type": "Root"}])
     self.assertEqual(len(node), 1)
     self.assertEqual(node.type, "Root")
     self.assertEqual(node.value, None)
     self.assertEqual(len(node.children), 0)
     self.assertEqual(node.parent, None)
Beispiel #11
0
    def matches(self, node: AstNode):
        # is_null = str(node.fields.get('target', '<null>') == '<null>')
        # if node.fields.get('target', '<null>') != '<null>':
        #     return False

        if AdversarialNodeReplacement.is_constant(node):
            # self.type_counts['constant' + is_null] += 1
            return True
        if node.type == "PropertyAccessExpression":
            # self.type_counts['PropertyAccessExpression' + is_null] += 1
            return True

        parent = node.up()
        if parent is not None:
            pos = node.pos_in_parent()
            "x.y -> ((...) ? x : x).y"
            if pos == 0 and parent.type == "PropertyAccessExpression":
                # self.type_counts['PropertyAccessExpression v2' + is_null] += 1
                return True

            " x + y -> ((...) ? x : x) + y"
            if pos != 1 and parent.type == "BinaryExpression":
                # self.type_counts['BinaryExpression v2' + is_null] += 1
                return True

            if node.type == "BinaryExpression" and parent.type != "ExpressionStatement":
                # self.type_counts['BinaryExpression' + is_null] += 1
                return True

            "{ y : x }"
            if pos == 2 and parent.type == "PropertyAssignment":
                # self.type_counts['PropertyAssignment' + is_null] += 1
                return True

        "return x -> return (...) ? x : x"
        if node.has_left() and node.left().type in [
                "ReturnKeyword", "FirstAssignment"
        ]:
            # self.type_counts['left' + is_null] += 1
            return True

        return False
Beispiel #12
0
 def get_scope_type(node: AstNode):
     if node.type == "ClassExpression":
         raw_type = sample.target[node.id]
     else:
         node = node.down_first()
         while node.type not in ["ClassKeyword", "InterfaceKeyword"]:
             if not node.has_right():
                 break
             node = node.right()
         raw_type = sample.target[node.id]
     if "typeof" in raw_type:
         """
         converts
         "'typeof Foo'"
         to 
         ["'typeof Foo'", "Foo"]
         """
         return [raw_type, raw_type.split(" ")[-1]]
     # assert 'typeof' in raw_type, '{}\n{}, raw_type: {}'.format(base_node, node, raw_type)
     return [raw_type]
Beispiel #13
0
    def apply(self, tree_id, node: AstNode):
        assert self.matches(node)

        def find_params(function_def: AstNode):
            assert function_def.down_first() is not None
            c = function_def.down_first()
            while c.type != "OpenParenToken":
                c = c.right()
            assert c.has_right() and c.right().type == "SyntaxList"
            return c.right()

        params = find_params(node)
        new_params = params.deepCopy(parent=params.parent)
        for i in range(random.randint(1, 3)):
            param = AstNode(idx=-1, type="Parameter", value=self.null_id)
            # TODO: enable adversarial modifications of the added identifier
            ident = AstNode(idx=-1,
                            type="Identifier",
                            value="param{}".format(i))
            param.add_child(ident)

            if new_params.children:
                comma_token = AstNode(idx=-1, type="CommaToken", value=",")
                new_params.add_child(comma_token)
            new_params.add_child(param)

        assert params.id != -1
        key = (tree_id, params.id)
        assert key not in self.applied_positions
        "remember where the change was applied such that it can be reverted later"
        self.applied_positions[key] = (params, new_params)
        params.swapNodes(new_params)
Beispiel #14
0
    def apply(self, tree_id, node: AstNode):
        assert self.matches(node)

        # whether the expression is added before or after
        is_after = random.choice([True, False])

        root = AstNode(idx=-1, type="ExpressionStatement", value=self.null_id)
        expr = self.expr_gen.gen_bin_expr(depth=random.randint(0, 2))
        colon = AstNode(idx=-1, type="SemicolonToken", value=";")
        root.add_child(expr)
        root.add_child(colon)

        assert node.id != -1
        key = (tree_id, node.id)
        assert key not in self.applied_positions
        # "remember where the change was applied such that it can be reverted later"
        self.applied_modifications.append((node, root, is_after))
        node.parent.add_child(root, pos=node.pos_in_parent() + 1 * is_after)
Beispiel #15
0
    def test_single_child(self):
        node = AstNode.fromJson([
            {
                "id": 0,
                "type": "Root",
                "children": [1]
            },
            {
                "id": 1,
                "type": "Identifier",
                "value": "x"
            },
        ])
        self.assertEqual(len(node), 2)
        self.assertEqual(node.type, "Root")
        self.assertEqual(node.value, None)
        self.assertEqual(len(node.children), 1)
        self.assertEqual(node.parent, None)

        child = node.children[0]
        self.assertEqual(child.type, "Identifier")
        self.assertEqual(child.value, "x")
        self.assertEqual(child.parent, node)
Beispiel #16
0
    def gen_bin_expr(self, depth=0, target_type=None, parent=None):
        if depth == 0:
            return self.gen_constant(target_type=target_type, parent=parent)

        if target_type is None:
            target_type = random.choice(self.constant_types)

        bin_op = AstNode(idx=-1,
                         parent=parent,
                         type=self.bin_expr_type,
                         value=self.null_id)
        bin_operand_type, bin_operand_value = random.choice(
            self.value_index.bin_ops)
        if depth == 0:
            bin_op.children = [
                self.gen_constant(target_type=target_type, parent=bin_op),
                AstNode(
                    idx=-1,
                    parent=bin_op,
                    type=bin_operand_type,
                    value=bin_operand_value,
                ),
                self.gen_constant(target_type=target_type, parent=bin_op),
            ]
        else:
            bin_op.children = [
                self.gen_bin_expr(random.randint(0, depth - 1),
                                  target_type=target_type,
                                  parent=bin_op),
                AstNode(
                    idx=-1,
                    parent=bin_op,
                    type=bin_operand_type,
                    value=bin_operand_value,
                ),
                self.gen_bin_expr(random.randint(0, depth - 1),
                                  target_type=target_type,
                                  parent=bin_op),
            ]
        return bin_op
Beispiel #17
0
    def __init__(self, value_index: NodeValueIndexStr):
        self.value_index = value_index

        self.constant_types_to_node_type = {
            "string":
            ["StringLiteral", "TemplateExpression", "FirstTemplateToken"],
            "boolean": ["TrueKeyword", "FalseKeyword"],
            "number": ["FirstLiteralToken"],
        }

        self.constant_types = list(self.constant_types_to_node_type.keys())

        self.null_id = "<null>"
        self.bin_expr_type = "BinaryExpression"

        self.cond_expr = AstNode(idx=-1,
                                 type="ConditionalExpression",
                                 value=self.null_id)
        self.paren_expr = AstNode(idx=-1,
                                  type="ParenthesizedExpression",
                                  value=self.null_id)
        self.open_paren_token = AstNode(idx=-1,
                                        type="OpenParenToken",
                                        value="(")
        self.close_paren_token = AstNode(idx=-1,
                                         type="CloseParenToken",
                                         value=")")
        self.question_token = AstNode(idx=-1, type="QuestionToken", value="?")
        self.colon_token = AstNode(idx=-1, type="ColonToken", value=":")
Beispiel #18
0
    def gen_array_expression(self, node, num_elem, select_idx):
        assert 0 <= select_idx < num_elem
        elem_access = AstNode(idx=-1,
                              type="ElementAccessExpression",
                              value=self.null_id)
        array_lit = AstNode(idx=-1,
                            type="ArrayLiteralExpression",
                            value=self.null_id)

        syntax_list = AstNode(idx=-1, type="SyntaxList", value=self.null_id)
        for idx in range(num_elem):
            array_node = node.deepCopy()
            syntax_list.add_child(array_node)

            if idx != select_idx:
                """
                this ensures that the number of nodes to predict and their order 
                is the same for the original and modified tree
                """
                for n in array_node.forEachNode():
                    n.id = -1
                    n.fields.clear()

        array_lit.add_child(AstNode(idx=-1, type="OpenBracketToken",
                                    value="["))
        array_lit.add_child(syntax_list)
        array_lit.add_child(
            AstNode(idx=-1, type="CloseBracketToken", value="]"))
        elem_access.add_child(array_lit)
        elem_access.add_child(
            AstNode(idx=-1, type="OpenBracketToken", value="["))
        elem_access.add_child(
            AstNode(idx=-1, type="FirstLiteralToken", value=select_idx))
        elem_access.add_child(
            AstNode(idx=-1, type="CloseBracketToken", value="]"))
        return elem_access
Beispiel #19
0
class ExpressionGenerator:
    def __init__(self, value_index: NodeValueIndexStr):
        self.value_index = value_index

        self.constant_types_to_node_type = {
            "string":
            ["StringLiteral", "TemplateExpression", "FirstTemplateToken"],
            "boolean": ["TrueKeyword", "FalseKeyword"],
            "number": ["FirstLiteralToken"],
        }

        self.constant_types = list(self.constant_types_to_node_type.keys())

        self.null_id = "<null>"
        self.bin_expr_type = "BinaryExpression"

        self.cond_expr = AstNode(idx=-1,
                                 type="ConditionalExpression",
                                 value=self.null_id)
        self.paren_expr = AstNode(idx=-1,
                                  type="ParenthesizedExpression",
                                  value=self.null_id)
        self.open_paren_token = AstNode(idx=-1,
                                        type="OpenParenToken",
                                        value="(")
        self.close_paren_token = AstNode(idx=-1,
                                         type="CloseParenToken",
                                         value=")")
        self.question_token = AstNode(idx=-1, type="QuestionToken", value="?")
        self.colon_token = AstNode(idx=-1, type="ColonToken", value=":")

    def gen_constant(self, target_type=None, parent=None):
        if target_type is None:
            target_type = random.choice(self.constant_types)

        node_type = random.choice(
            self.constant_types_to_node_type[target_type])
        node_value = random.choice(self.value_index.per_type_values[node_type])
        astnode = AstNode(
            idx=PositionIDs.ADVERSARIAL_CONSTANT,
            parent=parent,
            type=node_type,
            value=node_value,
        )
        astnode.origin = PositionIDs.ADVERSARIAL_CONSTANT
        return astnode

    def gen_bin_expr(self, depth=0, target_type=None, parent=None):
        if depth == 0:
            return self.gen_constant(target_type=target_type, parent=parent)

        if target_type is None:
            target_type = random.choice(self.constant_types)

        bin_op = AstNode(idx=-1,
                         parent=parent,
                         type=self.bin_expr_type,
                         value=self.null_id)
        bin_operand_type, bin_operand_value = random.choice(
            self.value_index.bin_ops)
        if depth == 0:
            bin_op.children = [
                self.gen_constant(target_type=target_type, parent=bin_op),
                AstNode(
                    idx=-1,
                    parent=bin_op,
                    type=bin_operand_type,
                    value=bin_operand_value,
                ),
                self.gen_constant(target_type=target_type, parent=bin_op),
            ]
        else:
            bin_op.children = [
                self.gen_bin_expr(random.randint(0, depth - 1),
                                  target_type=target_type,
                                  parent=bin_op),
                AstNode(
                    idx=-1,
                    parent=bin_op,
                    type=bin_operand_type,
                    value=bin_operand_value,
                ),
                self.gen_bin_expr(random.randint(0, depth - 1),
                                  target_type=target_type,
                                  parent=bin_op),
            ]
        return bin_op

    def ternary_expr(self,
                     left: AstNode,
                     right: AstNode,
                     depth=0,
                     target_type=None,
                     parent=None):
        root = self.cond_expr.copyNoChildren(parent=parent)

        paren_expr = self.paren_expr.copyNoChildren(parent=root)
        paren_expr.children = [
            self.open_paren_token.copyNoChildren(parent=paren_expr),
            self.gen_bin_expr(depth=depth,
                              target_type=target_type,
                              parent=paren_expr),
            self.close_paren_token.copyNoChildren(parent=paren_expr),
        ]

        left = left.deepCopy(parent=root)
        right = right.deepCopy(parent=root)
        root.children = [
            paren_expr,
            self.question_token.copyNoChildren(parent=root),
            left,
            self.colon_token.copyNoChildren(parent=root),
            right,
        ]
        """
        pick randomly which of left/right should keep target values
        this ensures that the number of nodes to predict and their order is the same for the original and modified tree
        """
        clear_node = random.choice([left, right])
        for node in clear_node.forEachNode():
            node.id = -1
            node.fields.clear()
        return root
Beispiel #20
0
    def apply(self, tree_id, node: AstNode):
        assert self.matches(node)

        # whether the expression is added before or after
        is_after = random.choice([True, False])

        values = set()
        for n in node.parent.forEachNode():
            if n.type == "Identifier":
                values.add(n.value)
        values = list(values)

        root = AstNode(idx=-1, type="ExpressionStatement", value=self.null_id)
        obj = AstNode(idx=-1,
                      type="ObjectLiteralExpression",
                      value=self.null_id)
        root.add_child(obj)
        n = AstNode(idx=-1, type="FirstPunctuation", value="{")
        obj.add_child(n)
        block = AstNode(idx=-1, type="SyntaxList", value=self.null_id)
        obj.add_child(block)
        for i in range(random.randint(1, 5)):
            if i != 0:
                n = AstNode(idx=-1, type="CommaToken", value=",")
                block.add_child(n)

            prop = AstNode(idx=-1,
                           type="PropertyAssignment",
                           value=self.null_id)
            if not values or random.random() > 0.5:
                n = self.expr_gen.gen_constant()
                prop.add_child(n)
            else:
                n = AstNode(idx=-1,
                            type="Identifier",
                            value=random.choice(values))
                prop.add_child(n)
            n = AstNode(idx=-1, type="ColonToken", value=":")
            prop.add_child(n)
            if not values or random.random() > 0.5:
                n = self.expr_gen.gen_constant()
                prop.add_child(n)
            else:
                n = AstNode(idx=-1,
                            type="Identifier",
                            value=random.choice(values))
                prop.add_child(n)
            block.add_child(prop)

        n = AstNode(idx=-1, type="CloseBraceToken", value="}")
        block.add_child(n)

        colon = AstNode(idx=-1, type="SemicolonToken", value=";")
        root.add_child(colon)

        assert node.id != -1
        key = (tree_id, node.id)
        assert key not in self.applied_positions
        # "remember where the change was applied such that it can be reverted later"
        self.applied_modifications.append((node, root, is_after))
        node.parent.add_child(root, pos=node.pos_in_parent() + 1 * is_after)