def comments_iterator(logs): # TODO (zurk): Use stdout and remove ifs when the lookout issue is solved: # https://github.com/src-d/lookout/issues/601 for log_line in logs.splitlines(): log_entry = json.loads(log_line.decode()) if log_entry["msg"] == "line comment": yield Comment( file=log_entry["file"], text=log_entry["text"], line=log_entry["line"]) if log_entry["msg"] == "file comment": yield Comment(file=log_entry["file"], text=log_entry["text"]) if log_entry["msg"] == "global comment": yield Comment(text=log_entry["text"])
def analyze(self, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_request_stub: DataStub, **data) -> [Comment]: comments = [] changes = list(data["changes"]) base_files = files_by_language(c.base for c in changes) head_files = files_by_language(c.head for c in changes) for lang, lang_head_files in head_files.items(): if lang.lower() != "java": continue self.log.info("Working on %d java files", len(lang_head_files)) for i, (path, file) in enumerate(lang_head_files.items(), start=1): try: prev_file = base_files[lang][path] except KeyError: lines = None else: lines = find_new_lines(prev_file, file) names_file, tokens_file, line_numbers = self._extract_features( file, lines) for prediction, target, score, line_number, type_hint in self.translate( tokens_file, names_file, line_numbers): comment = Comment() comment.line = line_number comment.file = path comment.confidence = int(round(score * 100)) comments.append(comment) if type_hint == FunctionNameAnalyzer.TranslationTypes.LESS_DETAILED: comment.text = "Consider a more generic name: %s instead of %s" % ( prediction, target) else: comment.text = "Consider a more specific name: %s instead of %s" % ( prediction, target) comments.append(comment) self.log.info("Processed %d files", i) return comments
def analyze(self, ptr_from: ReferencePointer, ptr_to: ReferencePointer, # noqa: D data_service: DataService, changes: Iterable[Change]) -> [Comment]: self._log.info("analyze %s %s", ptr_from.commit, ptr_to.commit) comments = [] parser = TokenParser(stem_threshold=100, single_shot=True) words = autocorrect.word.KNOWN_WORDS.copy() try: for name in self.model.names: if len(name) >= 3: autocorrect.word.KNOWN_WORDS.add(name) for change in changes: suggestions = defaultdict(list) new_lines = set(find_new_lines(change.base, change.head)) for node in bblfsh.filter(change.head.uast, "//*[@roleIdentifier]"): if node.start_position is not None and node.start_position.line in new_lines: for part in parser.split(node.token): if part not in self.model.names: fixed = autocorrect.spell(part) if fixed != part: suggestions[node.start_position.line].append( (node.token, part, fixed)) for line, s in suggestions.items(): comment = Comment() comment.file = change.head.path comment.text = "\n".join("`%s`: %s > %s" % fix for fix in s) comment.line = line comment.confidence = 100 comments.append(comment) finally: autocorrect.word.KNOWN_WORDS = words return comments
def analyze(self, ptr_from: ReferencePointer, ptr_to: ReferencePointer, # noqa: D data_service: DataService, changes: Iterable[Change]) -> [Comment]: self._log.info("analyze %s %s", ptr_from.commit, ptr_to.commit) comments = [] for change in changes: comment = Comment() comment.file = change.head.path comment.text = "%s %d > %d" % (change.head.language, self.model.node_counts.get(change.base.path, 0), self.count_nodes(change.head.uast)) comment.line = 0 comment.confidence = 100 comments.append(comment) return comments
def to_comment(self, correct_y: int) -> Comment: """ Writes the comment with regard to the correct node class. :param correct_y: the index of the correct node class. :return: Lookout Comment object. """ comment = Comment() comment.line = self.start.line if correct_y == CLASS_INDEX[CLS_NOOP]: comment.text = "format: %s at column %d should be removed" % ( CLASSES[self.y], self.start.col) elif self.y == CLASS_INDEX[CLS_NOOP]: comment.text = "format: %s should be inserted at column %d" % ( CLASSES[correct_y], self.start.col) else: comment.text = "format: replace %s with %s at column %d" % ( CLASSES[self.y], CLASSES[correct_y], self.start.col) return comment
def generate_comment(filename: str, confidence: int, line: int, text: str) -> Comment: """ Generate comment. :param filename: filename. :param confidence: confidence of comment. Should be in range [0, 100]. :param line: line number for comment. Expecting 1-based indexing. If 0 - comment for the whole file. :param text: comment text. :return: generated comment. """ assert 0 <= confidence <= 100, "Confidence should be in range 0~100 but value is '%s'" % \ confidence assert isinstance( line, int), "Line should be integer but it's type is '%s'" % type(line) assert 0 <= line, "Expected value >= 0 but got '%s'" % line comment = Comment() comment.file = filename comment.confidence = confidence comment.line = line comment.text = text return comment
def analyze(self, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data) -> [Comment]: """ Return the list of `Comment`-s - found typo corrections. :param ptr_from: The Git revision of the fork point. Exists in both the original and \ the forked repositories. :param ptr_to: The Git revision to analyze. Exists only in the forked repository. :param data_service: The channel to the data service in Lookout server to query for \ UASTs, file contents, etc. :param data: Extra data passed into the method. Used by the decorators to simplify \ the data retrieval. :return: List of found review suggestions. Refer to \ lookout/core/server/sdk/service_analyzer.proto. """ log = self.log comments = [] changes = list(data["changes"]) base_files_by_lang = files_by_language(c.base for c in changes) head_files_by_lang = files_by_language(c.head for c in changes) line_length = self.config.get("line_length_limit", self.DEFAULT_LINE_LENGTH_LIMIT) for lang, head_files in head_files_by_lang.items(): for file in filter_files(head_files, line_length, log): try: prev_file = base_files_by_lang[lang][file.path] except KeyError: lines = [] old_identifiers = set() else: lines = find_new_lines(prev_file, file) old_identifiers = { node.token for node in uast2sequence(prev_file.uast) if bblfsh.role_id("IDENTIFIER") in node.roles and bblfsh.role_id("IMPORT") not in node.roles and node.token } changed_nodes = extract_changed_nodes(file.uast, lines) new_identifiers = [ node for node in changed_nodes if bblfsh.role_id("IDENTIFIER") in node.roles and bblfsh.role_id("IMPORT") not in node.roles and node.token and node.token not in old_identifiers ] if not new_identifiers: continue suggestions = self.check_identifiers( [n.token for n in new_identifiers]) for index in suggestions.keys(): corrections = suggestions[index] for token in corrections.keys(): comment = Comment() comment.file = file.path corrections_line = " " + ", ".join( "%s (%d%%)" % (candidate[0], int(candidate[1] * 100)) for candidate in corrections[token]) comment.text = """ Possible typo in \"%s\". Suggestions: """.strip( ) % new_identifiers[index].token + corrections_line comment.line = new_identifiers[ index].start_position.line comment.confidence = int(corrections[token][0][1] * 100) comments.append(comment) return comments
def analyze(self, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_request_stub: DataStub, **data) -> [Comment]: comment = Comment() comment.text = "%s|%s" % (ptr_from.commit, ptr_to.commit) FakeAnalyzer.stub = data_request_stub return [comment]
def analyze(self, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data) -> [Comment]: comment = Comment() comment.text = "%s|%s" % (ptr_from.commit, ptr_to.commit) FakeAnalyzer.service = data_service return [comment]