Example #1
0
 def _generate_token_fixes(
         self, file: File, fe: FeatureExtractor, feature_extractor_output,
         bblfsh_stub: "bblfsh.aliases.ProtocolServiceStub", rules: Rules,
 ) -> Tuple[List[LineFix], List[VirtualNode], numpy.ndarray, numpy.ndarray]:
     X, y, (vnodes_y, vnodes, vnode_parents, node_parents) = feature_extractor_output
     y_pred_pure, rule_winners, new_rules, grouped_quote_predictions = rules.predict(
         X=X, vnodes_y=vnodes_y, vnodes=vnodes, feature_extractor=fe)
     y_pred = rules.fill_missing_predictions(y_pred_pure, y)
     if self.analyze_config[file.language.lower()]["uast_break_check"]:
         checker = UASTStabilityChecker(fe)
         y, y_pred, vnodes_y, rule_winners, safe_preds = checker.check(
             y=y, y_pred=y_pred, vnodes_y=vnodes_y, vnodes=vnodes, files=[file],
             stub=bblfsh_stub, vnode_parents=vnode_parents, node_parents=node_parents,
             rule_winners=rule_winners, grouped_quote_predictions=grouped_quote_predictions)
         y_pred_pure = y_pred_pure[safe_preds]
     assert len(y) == len(y_pred)
     assert len(y) == len(rule_winners)
     code_generator = CodeGenerator(fe, skip_errors=True)
     new_vnodes = code_generator.apply_predicted_y(vnodes, vnodes_y, rule_winners, new_rules)
     token_fixes = []
     newline_index = CLASS_INDEX[CLS_NEWLINE]
     for line_number, line in self._group_line_nodes(
             y, y_pred, vnodes_y, new_vnodes, rule_winners):
         line_ys, line_ys_pred, line_vnodes_y, new_line_vnodes, line_winners = line
         try:
             new_code_line = code_generator.generate_new_line(new_line_vnodes)
         except Exception:
             self._log.exception(
                 "Failed to generate new line suggestion for line %d in %s. line vnodes:\n%s",
                 line_number, file.path, "\n".join(
                     "%s, y_old=%s" % (repr(vn), getattr(vn, "y_old", "N/A"))
                     for vn in new_line_vnodes))
             new_code_line = None
         if (new_line_vnodes and hasattr(new_line_vnodes[0], "y_old") and newline_index in
                 new_line_vnodes[0].y_old):
             lines_num_diff = new_line_vnodes[0].y.count(newline_index) - \
                              new_line_vnodes[0].y_old.count(newline_index)
             if lines_num_diff < 0:
                 # Some lines were removed. This means that several original lines should be
                 # modified. GitHub Suggested Change feature cannot handle such cases right now.
                 # To not confuse the user we do not provide any code suggestion.
                 new_code_line = None
         confidence = self._get_comment_confidence(line_ys, line_ys_pred, line_winners,
                                                   new_rules)
         fixed_vnodes = [vnode for vnode in new_line_vnodes if
                         hasattr(vnode, "y_old") and vnode.y_old != vnode.y]
         token_fixes.append(LineFix(
             line_number=line_number,        # line number for the comment
             suggested_code=new_code_line,   # code line suggested by our model
             fixed_vnodes=fixed_vnodes,      # VirtualNode-s with changed y
             confidence=confidence,          # overall confidence in the prediction, 0-100
         ))
     return token_fixes, new_vnodes, y_pred_pure, y
Example #2
0
 def test_vnode_positions(self):
     code_generator = CodeGenerator(feature_extractor=self.extractor)
     lines = self.code.decode("utf-8", "replace").splitlines()
     lines.append("\r\n")
     ok = True
     for line_number, line in FormatAnalyzer._group_line_nodes(
             self.y, self.y - 1, self.vnodes_y, self.vnodes, repeat(0)):
         line_ys, line_ys_pred, line_vnodes_y, new_line_vnodes, line_winners = line
         new_code_line = code_generator.generate_new_line(new_line_vnodes)
         if lines[line_number - 1] != new_code_line:
             print("Lines %d are different" % line_number, file=sys.stderr)
             print(repr(lines[line_number - 1]), file=sys.stderr)
             print(repr(new_code_line), file=sys.stderr)
             print()
             ok = False
     self.assertTrue(ok, "Original and restored lines are different")
Example #3
0
    def test_generate_new_line(self):
        self.maxDiff = None
        expected_res = {
            "nothing changed": [],
            "remove new line in the end of 4th line":
            None,
            "indentation in the beginning":
            [" import { makeToast } from '../../common/app/Toasts/redux';"],
            "remove indentation in the 4th line till the end":
            [" return Object.keys(flash)", " }"],
            "new line between 6th and 7th regular code lines":
            ["\n      return messages.map(message => ({"],
            "new line in the middle of the 7th code line with indentation increase":
            ["      return messages\n        .map(message => ({", "  })"],
            "new line in the middle of the 7th code line with indentation decrease":
            ["      return messages\n    .map(message => ({", "      })"],
            "new line in the middle of the 7th code line without indentation increase":
            ["      return messages\n      .map(message => ({"],
            "change quotes":
            ['import { makeToast } from "../../common/app/Toasts/redux";'],
            "remove indentation decrease 11th line": ["        }));"],
            "change indentation decrease to indentation increase 11th line":
            ["          }));"],
            "change indentation decrease to indentation increase 11th line but keep the rest":
            ["          }));", "})"],
        }

        base = Path(__file__).parent
        # str() is needed for Python 3.5
        with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin:
            contents = fin.read()
        with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin:
            uast = bblfsh.Node.FromString(fin.read())
        config = FormatAnalyzer._load_config(get_config())
        fe_config = config["train"]["javascript"]

        for case in expected_res:
            offsets, y_pred, _ = cases[case]
            feature_extractor = FeatureExtractor(
                language="javascript",
                label_composites=label_composites,
                **fe_config["feature_extractor"])
            file = UnicodeFile(content=contents,
                               uast=uast,
                               path="",
                               language="")
            X, y, (vnodes_y, vnodes, vnode_parents, node_parents) = \
                feature_extractor.extract_features([file])
            y_cur = deepcopy(y)
            for offset, yi in zip(offsets, y_pred):
                i = None
                for i, vnode in enumerate(vnodes_y):  # noqa: B007
                    if offset == vnode.start.offset:
                        break
                y_cur[i] = yi
            code_generator = CodeGenerator(feature_extractor)
            pred_vnodes = code_generator.apply_predicted_y(
                vnodes, vnodes_y, list(range(len(vnodes_y))), FakeRules(y_cur))
            res = []
            for gln in FormatAnalyzer._group_line_nodes(
                    y, y_cur, vnodes_y, pred_vnodes, [1] * len(y)):
                line, (line_y, line_y_pred, line_vnodes_y, line_vnodes,
                       line_rule_winners) = gln
                new_code_line = code_generator.generate_new_line(line_vnodes)
                res.append(new_code_line)
            if expected_res[case] is not None:
                # None means that we delete some lines. We are not handle this properly now.
                self.assertEqual(res, expected_res[case], case)