Beispiel #1
0
    def generate_typos_fixes(self,
                             changes: Sequence[Change]) -> Iterator[TypoFix]:
        """
        Generate all data about typo fix required for any type of further processing.

        The processing can be comment generation or performance report generation.

        :param changes: The list of changes in the pointed state.
        :return: Iterator with unrendered data per comment.
        """
        base_files_by_lang = files_by_language(c.base for c in changes)
        head_files_by_lang = files_by_language(c.head for c in changes)
        for lang, head_files in head_files_by_lang.items():
            for file in filter_files(
                    files=head_files,
                    line_length_limit=self.config["line_length_limit"],
                    overall_size_limit=self.config["overall_size_limit"],
                    log=self._log):
                try:
                    prev_file = base_files_by_lang[lang][file.path]
                except KeyError:
                    lines = []
                    old_identifiers = set()
                else:
                    lines = find_new_lines(prev_file, file)
                    old_identifiers = {
                        node.token
                        for node in uast2sequence(prev_file.uast)
                        if bblfsh.role_id("IDENTIFIER") in node.roles
                        and bblfsh.role_id("IMPORT") not in node.roles
                        and node.token
                    }
                changed_nodes = extract_changed_nodes(file.uast, lines)
                new_identifiers = [
                    node for node in changed_nodes
                    if bblfsh.role_id("IDENTIFIER") in node.roles
                    and bblfsh.role_id("IMPORT") not in node.roles
                    and node.token and node.token not in old_identifiers
                ]
                if not new_identifiers:
                    continue
                suggestions = self.check_identifiers(
                    [n.token for n in new_identifiers])
                for index in suggestions.keys():
                    corrections = suggestions[index]
                    for token in corrections.keys():
                        yield TypoFix(
                            head_file=file,
                            token=new_identifiers[index].token,
                            candidates=[
                                Candidate(*c[:2]) for c in corrections[token]
                            ],
                            line_number=new_identifiers[index].start_position.
                            line,
                        )
Beispiel #2
0
    def __call__(self, uast):
        bag = defaultdict(int)
        node_sequence = uast2sequence(uast)

        # convert to str - requirement from wmhash.BagsExtractor
        node_sequence = [self.node2index[n] for n in node_sequence]

        for seq_len in self._seq_lens:
            for i in range(0, len(node_sequence) - seq_len + 1, self._stride):
                key = self.SEP.join(node_sequence[i:i + seq_len])
                bag[key] += 1
        return bag
 def extract_functions_from_uast(self, uast: bblfsh.Node):
     for node in uast2sequence(uast):
         if node.internal_type != "MethodDeclaration":
             continue
         for subnode in node.children:
             if FUNCTION not in subnode.roles and NAME not in subnode.roles:
                 continue
             name = subnode.token
             break
         tokens = list(self.process_uast(node))
         if len(tokens) < 5:
             continue
         yield (name, node.start_position.line, node.end_position.line, [
             token for token, pos in sorted(tokens, key=lambda x: x[1])
         ])
Beispiel #4
0
    def __call__(self, uast):
        """
        HOTFIX for https://github.com/bblfsh/client-python/issues/92
        Converts a UAST to a weighed bag-of-literals. The weights are literals frequencies.
        The tokens are preprocessed by _token_parser.
        Overwrite __call__ to avoid issues with `bblfsh.filter`.

        :param uast: The UAST root node.
        :return: bag
        """
        nodes = [node for node in uast2sequence(uast) if LITERAL in node.roles]
        bag = defaultdict(int)
        for node in nodes:
            for sub in self._token_parser.process_token(node.token):
                try:
                    bag[self._token2index[sub]] += 1
                except KeyError:
                    continue
        return bag
    def analyze(self, ptr_from: ReferencePointer, ptr_to: ReferencePointer,
                data_service: DataService, **data) -> [Comment]:
        """
        Return the list of `Comment`-s - found typo corrections.

        :param ptr_from: The Git revision of the fork point. Exists in both the original and \
                         the forked repositories.
        :param ptr_to: The Git revision to analyze. Exists only in the forked repository.
        :param data_service: The channel to the data service in Lookout server to query for \
                             UASTs, file contents, etc.
        :param data: Extra data passed into the method. Used by the decorators to simplify \
                     the data retrieval.
        :return: List of found review suggestions. Refer to \
                 lookout/core/server/sdk/service_analyzer.proto.
        """
        log = self.log
        comments = []
        changes = list(data["changes"])
        base_files_by_lang = files_by_language(c.base for c in changes)
        head_files_by_lang = files_by_language(c.head for c in changes)
        line_length = self.config.get("line_length_limit",
                                      self.DEFAULT_LINE_LENGTH_LIMIT)
        for lang, head_files in head_files_by_lang.items():
            for file in filter_files(head_files, line_length, log):
                try:
                    prev_file = base_files_by_lang[lang][file.path]
                except KeyError:
                    lines = []
                    old_identifiers = set()
                else:
                    lines = find_new_lines(prev_file, file)
                    old_identifiers = {
                        node.token
                        for node in uast2sequence(prev_file.uast)
                        if bblfsh.role_id("IDENTIFIER") in node.roles
                        and bblfsh.role_id("IMPORT") not in node.roles
                        and node.token
                    }
                changed_nodes = extract_changed_nodes(file.uast, lines)
                new_identifiers = [
                    node for node in changed_nodes
                    if bblfsh.role_id("IDENTIFIER") in node.roles
                    and bblfsh.role_id("IMPORT") not in node.roles
                    and node.token and node.token not in old_identifiers
                ]
                if not new_identifiers:
                    continue
                suggestions = self.check_identifiers(
                    [n.token for n in new_identifiers])
                for index in suggestions.keys():
                    corrections = suggestions[index]
                    for token in corrections.keys():
                        comment = Comment()
                        comment.file = file.path
                        corrections_line = " " + ", ".join(
                            "%s (%d%%)" %
                            (candidate[0], int(candidate[1] * 100))
                            for candidate in corrections[token])
                        comment.text = """
                            Possible typo in \"%s\". Suggestions:
                        """.strip(
                        ) % new_identifiers[index].token + corrections_line
                        comment.line = new_identifiers[
                            index].start_position.line
                        comment.confidence = int(corrections[token][0][1] *
                                                 100)
                        comments.append(comment)
        return comments