Ejemplo n.º 1
0
    def _split_vnodes_by_lines(vnodes: List[VirtualNode]) -> Iterator:
        """
        Split VirtualNode to several one-line VirtualNode if it is placed on several lines.

        New line character concatenated to the next line.
        It is applied to vnodes with y=None only.
        """
        stack = vnodes[::-1]
        while stack:
            vnode = stack.pop()
            value_lines = vnode.value.splitlines()
            if vnode.y is not None or len(value_lines) <= 1:
                yield vnode
                continue
            if value_lines[0] == "":
                # if there is only end of line characters we concatenate it to the next line
                next_line = value_lines[1] if len(value_lines) > 1 else ""
                value1 = vnode.value.splitlines(keepends=True)[0] + next_line
                middle = Position(offset=vnode.start.offset + len(value1),
                                  line=vnode.start.line + 1,
                                  col=1 + len(next_line))
            else:
                value1 = value_lines[0]
                middle = Position(offset=vnode.start.offset + len(value1), line=vnode.start.line,
                                  col=vnode.start.col + len(value1))
            value2 = vnode.value[len(value1):]
            if value2:
                # value2 can be multi-line so we put it back
                stack.append(VirtualNode(value=value2, start=middle, end=vnode.end,
                                         node=vnode.node))
            yield VirtualNode(value=value1, start=vnode.start, end=middle, node=vnode.node)
Ejemplo n.º 2
0
    def _add_noops(self, vnodes: Sequence[VirtualNode], path: str, index_labels: bool = False,
                   ) -> List[VirtualNode]:
        """
        Add CLS_NOOP nodes in between tokens without labeled nodes to allow for insertions.

        :param vnodes: The sequence of `VirtualNode`-s to augment with noop nodes.
        :param path: path to file.
        :param index_labels: Whether to index labels to define output classes or not.
        :return: The augmented `VirtualNode`-s sequence.
        """
        augmented_vnodes = []
        noop_label = (CLASS_INDEX[CLS_NOOP],)
        if not len(vnodes):
            return augmented_vnodes
        if vnodes[0].y is None:
            augmented_vnodes.append(VirtualNode(value="", start=Position(0, 1, 1),
                                                end=Position(0, 1, 1), y=noop_label, path=path))
        for vnode, next_vnode in zip(vnodes, islice(vnodes, 1, None)):
            augmented_vnodes.append(vnode)
            if vnode.y is None and not vnode.is_accumulated_indentation and next_vnode.y is None:
                augmented_vnodes.append(VirtualNode(value="", start=vnode.end, end=vnode.end,
                                                    y=noop_label, path=path))
        augmented_vnodes.append(next_vnode)
        if augmented_vnodes[-1].y is None:
            augmented_vnodes.append(VirtualNode(value="", start=vnodes[-1].end, end=vnodes[-1].end,
                                                y=noop_label, path=path))
        return augmented_vnodes
Ejemplo n.º 3
0
 def test_revert_indentation_change(self):
     cases = [
         ("\n    ", (cls.CLS_NEWLINE, cls.CLS_SPACE_INC, cls.CLS_SPACE_INC),
          "\n  "),
         ("\n    ", (cls.CLS_NEWLINE, cls.CLS_SPACE_DEC, cls.CLS_SPACE_DEC),
          "\n      "),
         ("\n\t ", (cls.CLS_NEWLINE, cls.CLS_TAB_INC, cls.CLS_SPACE_INC),
          "\n"),
         ("\n    ", (cls.CLS_NEWLINE, cls.CLS_TAB_INC, cls.CLS_TAB_INC),
          InapplicableIndentation),
         ("   ", (cls.CLS_SPACE, cls.CLS_SPACE_INC, cls.CLS_SPACE_INC),
          ValueError),
     ]
     for value, y, result in cases:
         vnode = VirtualNode(value,
                             Position(0, 1, 1),
                             Position(len(value), 1,
                                      len(value) + 1),
                             y=tuple(cls.CLASS_INDEX[i] for i in y))
         if isinstance(result, str):
             self.assertEqual(
                 CodeGenerator.revert_indentation_change(vnode), result)
         else:
             with self.assertRaises(result):
                 CodeGenerator.revert_indentation_change(vnode)
Ejemplo n.º 4
0
 def test_compute_labels_mappings(self):
     pos1, pos2 = Position(1, 1, 1), Position(10, 2, 1)
     files = [VirtualNode("", pos1, pos2, y=(1,))] * 2 + \
         [VirtualNode("", pos1, pos2), VirtualNode("", pos1, pos2, y=(2,)),
          VirtualNode("", pos1, pos2, y=(3,))]
     self.extractor.cutoff_label_support = 2
     self.extractor._compute_labels_mappings(files)
     self.assertEqual(self.extractor.labels_to_class_sequences, [(1,)])
     self.assertEqual(self.extractor.class_sequences_to_labels, {(1,): 0})
Ejemplo n.º 5
0
def _to_position(raw_lines_data, _lines_start_offset, offset):
    line_num = numpy.argmax(_lines_start_offset > offset) - 1
    col = offset - _lines_start_offset[line_num]
    line = raw_lines_data[line_num]
    if len(line) == col:
        if line.splitlines()[0] != line:
            # ends with newline
            line_num += 1
            col = 0
    return Position(offset, line_num + 1, col + 1)
Ejemplo n.º 6
0
 def _class_seq_to_vnodes(value, start, end, current_class_seq, path):
     if NEWLINE_INDEX not in current_class_seq or \
             current_class_seq[0] == NEWLINE_INDEX:
         # if there are no trailing whitespaces or tabs
         yield VirtualNode(value=value,
                           start=start,
                           end=end,
                           y=tuple(current_class_seq),
                           path=path)
     else:
         index = current_class_seq.index(NEWLINE_INDEX)
         middle = Position(start.offset + index, start.line,
                           start.col + index)
         yield VirtualNode(value=value[:index],
                           start=start,
                           end=middle,
                           y=tuple(current_class_seq[:index]),
                           path=path)
         yield VirtualNode(value=value[index:],
                           start=middle,
                           end=end,
                           y=tuple(current_class_seq[index:]),
                           path=path)
Ejemplo n.º 7
0
    def _parse_file(self, contents: str, root: bblfsh.Node, path: str) -> \
            Tuple[List[VirtualNode], Dict[int, bblfsh.Node]]:
        """
        Parse a file into a sequence of `VirtuaNode`-s and a mapping from VirtualNode to parent.

        Given the source text and the corresponding UAST this function compiles the list of
        `VirtualNode`-s and the parents mapping. That list of nodes equals to the original
        source text bit-to-bit after `"".join(n.value for n in nodes)`. `parents` map from
        `id(node)` to its parent `bblfsh.Node`.

        :param contents: source file text
        :param root: UAST root node
        :param path: path to the file, used for debugging
        :return: list of `VirtualNode`-s and the parents.
        """
        # build the line mapping
        lines = contents.splitlines(keepends=True)
        # Check if there is a newline in the end of file. Yes, you can just check
        # lines[-1][-1] == "\n" but if someone decide to use weird '\u2028' unicode character for
        # new line this condition gives wrong result.
        eof_new_line = lines[-1].splitlines()[0] != lines[-1]
        if eof_new_line:
            # We add last line as empty one because it actually exists, but .splitlines() does not
            # return it.
            lines.append("")
        line_offsets = numpy.zeros(len(lines) + 1, dtype=numpy.int32)
        pos = 0
        for i, line in enumerate(lines):
            line_offsets[i] = pos
            pos += len(line)
        line_offsets[-1] = pos + 1

        # walk the tree: collect nodes with assigned tokens and build the parents map
        node_tokens = []
        parents = {}
        queue = [root]
        while queue:
            node = queue.pop()
            if node.internal_type in self.node_fixtures:
                node = self.node_fixtures[node.internal_type](node)
            for child in node.children:
                parents[id(child)] = node
            queue.extend(node.children)
            if (node.token or node.start_position and node.end_position
                    and node.start_position != node.end_position and not node.children):
                node_tokens.append(node)
        node_tokens.sort(key=lambda n: n.start_position.offset)
        sentinel = bblfsh.Node()
        sentinel.start_position.offset = len(contents)
        sentinel.start_position.line = len(lines)
        node_tokens.append(sentinel)

        # scan `node_tokens` and fill the gaps with imaginary nodes
        result = []
        pos = 0
        parser = self.tokens.PARSER
        searchsorted = numpy.searchsorted
        for node in node_tokens:
            if node.start_position.offset < pos:
                continue
            if node.start_position.offset > pos:
                sumlen = 0
                diff = contents[pos:node.start_position.offset]
                for match in parser.finditer(diff):
                    positions = []
                    for suboff in (match.start(), match.end()):
                        offset = pos + suboff
                        line = searchsorted(line_offsets, offset, side="right")
                        col = offset - line_offsets[line - 1] + 1
                        positions.append(Position(offset, line, col))
                    token = match.group()
                    sumlen += len(token)
                    result.append(VirtualNode(token, *positions, path=path))
                assert sumlen == node.start_position.offset - pos, \
                    "missed some imaginary tokens: \"%s\"" % diff
            if node is sentinel:
                break
            result.extend(VirtualNode.from_node(node, contents, path, self.token_unwrappers))
            pos = node.end_position.offset
        return result, parents
Ejemplo n.º 8
0
    def _classify_vnodes(self, nodes: Iterable[VirtualNode], path: str) -> Iterable[VirtualNode]:
        """
        Fill "y" attribute in the VirtualNode-s extracted from _parse_file().

        It is the index of the corresponding class to predict. We detect indentation changes so
        several whitespace nodes are merged together.

        :param nodes: sequence of VirtualNodes.
        :param path: path to file.
        :return: new list of VirtualNodes, the size is different from the original.
        """
        indentation = []
        for node in nodes:
            if node.node is not None:
                yield node
                continue
            if not node.value.isspace():
                if node.value == "'":
                    node.y = (CLASS_INDEX[CLS_SINGLE_QUOTE],)
                elif node.value == '"':
                    node.y = (CLASS_INDEX[CLS_DOUBLE_QUOTE],)
                yield node
                continue
            lines = node.value.splitlines(keepends=True)
            if lines[-1].splitlines()[0] != lines[-1]:
                # We add last line as empty one to mimic .split("\n") behaviour
                lines.append("")
            if len(lines) == 1:
                # only tabs and spaces are possible
                for i, char in enumerate(node.value):
                    if char == "\t":
                        cls = (CLASS_INDEX[CLS_TAB],)
                    else:
                        cls = (CLASS_INDEX[CLS_SPACE],)
                    offset, lineno, col = node.start
                    yield VirtualNode(
                        char,
                        Position(offset + i, lineno, col + i),
                        Position(offset + i + 1, lineno, col + i + 1),
                        y=cls, path=path)
                continue
            line_offset = 0
            for i, line in enumerate(lines[:-1]):
                # `line` contains trailing whitespaces, we add it to the newline node
                start_offset = node.start.offset + line_offset
                start_col = node.start.col if i == 0 else 1
                lineno = node.start.line + i
                yield VirtualNode(
                    line,
                    Position(start_offset, lineno, start_col),
                    Position(start_offset + len(line), lineno + 1, 1),
                    y=(CLASS_INDEX[CLS_NEWLINE],), path=path)
                line_offset += len(line)
            line = lines[-1].splitlines()[0] if lines[-1] else ""
            my_indent = list(line)
            offset, lineno, col = node.end
            offset -= len(line)
            col -= len(line)
            try:
                for ws in indentation:
                    my_indent.remove(ws)
            except ValueError:
                if my_indent:
                    # mixed tabs and spaces, do not classify
                    yield VirtualNode(
                        line,
                        Position(offset, lineno, col),
                        node.end, path=path)
                    continue
                # indentation decreases
                for char in indentation[len(line):]:
                    if char == "\t":
                        cls = (CLASS_INDEX[CLS_TAB_DEC],)
                    else:
                        cls = (CLASS_INDEX[CLS_SPACE_DEC],)
                    yield VirtualNode(
                        "",
                        Position(offset, lineno, col),
                        Position(offset, lineno, col),
                        y=cls, path=path)
                indentation = indentation[:len(line)]
                if indentation:
                    yield VirtualNode(
                        "".join(indentation),
                        Position(offset, lineno, col),
                        node.end, is_accumulated_indentation=True, path=path)
            else:
                # indentation is stable or increases
                for i, char in enumerate(my_indent):
                    if char == "\t":
                        cls = (CLASS_INDEX[CLS_TAB_INC],)
                    else:
                        cls = (CLASS_INDEX[CLS_SPACE_INC],)
                    yield VirtualNode(
                        char,
                        Position(offset + i, lineno, col + i),
                        Position(offset + i + 1, lineno, col + i + 1),
                        y=cls, path=path)
                offset += len(my_indent)
                col += len(my_indent)
                if indentation:
                    yield VirtualNode(
                        "".join(indentation),
                        Position(offset, lineno, col),
                        Position(offset + len(indentation), lineno, col + len(indentation)),
                        is_accumulated_indentation=True, path=path)
                for char in my_indent:
                    indentation.append(char)
    def test_template(self):
        class FakeRules:
            rules = {34: "<rule # 34>"}

        class FakeModel:
            def __getitem__(self, item):
                return FakeRules()

        class FakeHeadFile:
            content = b"<first code line>\n<second code line>\n<third code line>"

        def fake_partitial(func, *_, **__):
            if func == descriptions.describe_rule:
                def fake_describe_rule(rule, *_, **__):
                    return rule
                return fake_describe_rule

            def fake_get_change_description(*_, **__):
                return "<change description>"
            return fake_get_change_description
        comment_template_flie = os.path.join(os.path.dirname(__file__), "..", "templates",
                                             "comment.jinja2")
        config = {
            "report_code_lines": True,
            "report_triggered_rules": True,
            "comment_template": comment_template_flie,
        }
        analyzer = FormatAnalyzer(config=config, model=FakeModel(), url="http://github.com/x/y")
        language = "<language>"
        line_number = 2
        suggested_code = "<new code line>"
        partial_backup = functools.partial
        vnode = VirtualNode(start=Position(10, 2, 1), end=Position(12, 3, 1), value="!",
                            y=(1,))
        vnode.applied_rule = FakeRules.rules[34]
        line_fix = LineFix(
            line_number=line_number, suggested_code=suggested_code,
            fixed_vnodes=[vnode], confidence=100)
        file_fix = FileFix(error="", line_fixes=[line_fix], language=language, base_file=None,
                           feature_extractor=None, file_vnodes=[], head_file=FakeHeadFile,
                           y=None, y_pred_pure=None)

        try:
            functools.partial = fake_partitial
            text = analyzer.render_comment_text(file_fix, 0)
            res = """format: style mismatch:
```<language>
1|<first code line>
2|<second code line>
3|<third code line>
```
```suggestion
<new code line>
```

<change description>
Triggered rule
```
<rule # 34>
```
"""
            self.assertEqual(text, res)
        finally:
            functools.partial = partial_backup
Ejemplo n.º 10
0
    def test_apply_new_indentation(self):
        cases = [
            ("\n    ", ("\n", "  "), (cls.CLS_NEWLINE, cls.CLS_SPACE_INC,
                                      cls.CLS_SPACE_INC), (cls.CLS_NEWLINE, ),
             ""),
            ("\n    ", ("\n", "      "), (cls.CLS_NEWLINE, cls.CLS_SPACE_DEC,
                                          cls.CLS_SPACE_DEC),
             (cls.CLS_NEWLINE, ), ""),
            ("\n\t ", ("\n", ""), (cls.CLS_NEWLINE, cls.CLS_TAB_INC,
                                   cls.CLS_SPACE_INC), (cls.CLS_NEWLINE, ),
             ""),
            ("\n    ", InapplicableIndentation,
             (cls.CLS_NEWLINE, cls.CLS_TAB_INC,
              cls.CLS_TAB_INC), (cls.CLS_NEWLINE, ), ""),
            ("\n   ", ValueError, (cls.CLS_NEWLINE, cls.CLS_SPACE,
                                   cls.CLS_SPACE_INC, cls.CLS_SPACE_INC),
             (cls.CLS_NEWLINE, ), ""),
            ("\n\t  ", InapplicableIndentation,
             (cls.CLS_NEWLINE, cls.CLS_SPACE_INC,
              cls.CLS_SPACE_INC), (cls.CLS_NEWLINE, cls.CLS_SPACE_DEC), ""),
            ("\n\t   ", ValueError, (cls.CLS_NEWLINE, cls.CLS_SPACE_DEC),
             (cls.CLS_NEWLINE, cls.CLS_SPACE_DEC, cls.CLS_SPACE,
              cls.CLS_SPACE_DEC), ""),
            ("\n\n    ", ("\n", "  "), (cls.CLS_NEWLINE, cls.CLS_NEWLINE,
                                        cls.CLS_SPACE_DEC),
             (cls.CLS_NEWLINE, cls.CLS_SPACE_DEC, cls.CLS_SPACE_DEC,
              cls.CLS_SPACE_DEC), ""),
            ("", ("\n", "  "), (cls.CLS_NOOP, ), (cls.CLS_NEWLINE, ), "  "),
            ("", ("\n\n", ""), (cls.CLS_NOOP, ), (cls.CLS_NEWLINE,
                                                  cls.CLS_NEWLINE), ""),
        ]
        for value, result, y_old, y, last_ident in cases:
            vnode = VirtualNode(value,
                                Position(0, 1, 1),
                                Position(len(y), 1,
                                         len(y) + 1),
                                y=tuple(cls.CLASS_INDEX[i] for i in y))
            vnode.y_old = tuple(cls.CLASS_INDEX[i] for i in y_old)
            if isinstance(result, tuple):
                self.assertEqual(
                    CodeGenerator.apply_new_indentation(vnode, last_ident),
                    result)
            else:
                with self.assertRaises(result):
                    CodeGenerator.apply_new_indentation(vnode, last_ident)

        msg = None

        def _warning(*args):
            nonlocal msg
            msg = args[0]

        try:
            backup_warning = CodeGenerator._log.warning
            CodeGenerator._log.warning = _warning
            vnode = VirtualNode(
                "\n ",
                Position(0, 1, 1),
                Position(3, 1, 4),
                y=tuple(cls.CLASS_INDEX[i]
                        for i in (cls.CLS_NEWLINE, cls.CLS_SPACE_DEC,
                                  cls.CLS_SPACE_DEC, cls.CLS_SPACE_DEC)))
            vnode.y_old = tuple(cls.CLASS_INDEX[i]
                                for i in (cls.CLS_NEWLINE, cls.CLS_SPACE_DEC))
            CodeGenerator.apply_new_indentation(vnode, "")
            expected_msg = "There is no indentation characters left to decrease for vnode"
            self.assertEqual(msg[:len(expected_msg)], expected_msg)
        finally:
            CodeGenerator._log.warning = backup_warning