Exemple #1
0
 def setUp(self):
     config = FormatAnalyzer._load_config(get_config())
     self.annotated_file = AnnotationManager.from_file(self.file)
     self.final_config = config["train"]["javascript"]
     self.extractor = FeatureExtractor(
         language="javascript", **self.final_config["feature_extractor"])
     self.annotated_file = AnnotationManager.from_file(self.file)
Exemple #2
0
 def setUpClass(cls):
     config = FormatAnalyzer._load_config(
         merge_dicts(
             get_config(), {
                 "train": {
                     "javascript": {
                         "feature_extractor": {
                             "left_siblings_window":
                             1,
                             "right_siblings_window":
                             1,
                             "parents_depth":
                             1,
                             "node_features":
                             ["start_line", "reserved", "roles"],
                         },
                     },
                 },
             }))["train"]
     base = Path(__file__).parent
     with lzma.open(str(base / "benchmark.js.xz"), mode="rt") as fin:
         contents = fin.read()
     with lzma.open(str(base / "benchmark.uast.xz")) as fin:
         uast = bblfsh.Node.FromString(fin.read())
     file = File(content=bytes(contents, "utf-8"), uast=uast)
     files = [file, file]
     cls.fe = FeatureExtractor(language="javascript",
                               **config["javascript"]["feature_extractor"])
     cls.fe.extract_features(files)
     cls.class_representations = cls.fe.composite_class_representations
     cls.n_classes = len(cls.fe.labels_to_class_sequences)
     cls.ordinal = cls.return_node_feature(FeatureId.start_line)
     cls.categorical = cls.return_node_feature(FeatureId.reserved)
     cls.bag = cls.return_node_feature(FeatureId.roles)
Exemple #3
0
    def generate_local_test(mcs, case_name, uast, contents):
        fe_config = FormatAnalyzer._load_config(
            get_config())["train"]["javascript"]
        feature_extractor = FeatureExtractor(language="javascript",
                                             label_composites=label_composites,
                                             **fe_config["feature_extractor"])
        file = UnicodeFile(content=contents, uast=uast, path="", language="")
        _, _, (vnodes_y, _, _, _) = feature_extractor.extract_features([file])
        offsets, y_pred, result = cases[case_name]

        def _test(self):
            y_cur = deepcopy(self.y)
            for offset, yi in zip(offsets, y_pred):
                i = None
                for i, vnode in enumerate(vnodes_y):  # noqa: B007
                    if offset == vnode.start.offset:
                        break
                y_cur[i] = yi
            code_generator = CodeGenerator(self.feature_extractor)
            pred_vnodes = code_generator.apply_predicted_y(
                self.vnodes, self.vnodes_y, list(range(len(self.vnodes_y))),
                FakeRules(y_cur))
            generated_file = code_generator.generate(pred_vnodes)
            self.assertEqual(generated_file, result)

        return _test
Exemple #4
0
 def test_quality_report_noisy(self):
     slogging.setup("DEBUG", False)
     with Capturing() as output, tempfile.TemporaryDirectory(
     ) as dir_output:
         try:
             quality_report_noisy(bblfsh=self.bblfsh,
                                  language=self.language,
                                  confidence_threshold=0.8,
                                  support_threshold=20,
                                  precision_threshold=0.95,
                                  dir_output=dir_output,
                                  config=get_config(),
                                  repos=REPOSITORIES)
         except SystemExit:
             self.skipTest("Matplotlib is required to run this test")
     pattern = re.compile(
         r"((?:prediction rate x)|(?:precision y)): \[(\d+.\d+(, \d+.\d+)+)\]"
     )
     metrics = {}
     for line in output:
         match = pattern.search(line)
         if match:
             metric, scores_string = list(match.groups())[:2]
             scores_string = scores_string.split(", ")
             scores = [float(f) for f in scores_string]
             metrics[metric] = scores
     self.assertGreater(metrics["prediction rate x"][-1], 0)
     self.assertGreater(metrics["precision y"][-1], 0)
 def test_train_review_analyzer_integration(self):
     """Integration test for review event."""
     with AnalyzerContextManager(analyzer=QualityReportAnalyzer,
                                 db=self.db.name,
                                 fs=self.fs.name) as context:
         context.review(
             FROM_COMMIT,
             TO_COMMIT,
             git_dir=self.jquery_dir,
             config_json={QualityReportAnalyzer.name: get_config()})
Exemple #6
0
 def setUpClass(cls):
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     with lzma.open(str(base / "benchmark.js.xz"), mode="rt") as fin:
         contents = fin.read()
     with lzma.open(str(base / "benchmark.uast.xz")) as fin:
         uast = bblfsh.Node.FromString(fin.read())
     file = File(content=bytes(contents, "utf-8"), uast=uast)
     cls.files = [file]
     config = FormatAnalyzer._load_config(get_config())["train"]
     cls.extractor = FeatureExtractor(
         language="javascript", **config["javascript"]["feature_extractor"])
 def test_train_review_analyzer_integration(self):
     """Integration test for review event."""
     with AnalyzerContextManager(analyzer=QualityReportAnalyzer,
                                 port=self.port,
                                 db=self.db.name,
                                 fs=self.fs.name):
         server.run("review",
                    FROM_COMMIT,
                    TO_COMMIT,
                    port=self.port,
                    git_dir=self.jquery_dir,
                    config_json=json.dumps(
                        {QualityReportAnalyzer.name: get_config()}))
Exemple #8
0
 def setUpClass(cls):
     config = FormatAnalyzer._load_config(get_config())["train"]
     cls.extractor = FeatureExtractor(
         language="javascript", **config["javascript"]["feature_extractor"])
     test_js_code_filepath = Path(__file__).parent / "jquery.layout.js"
     with open(str(test_js_code_filepath), mode="rb") as f:
         cls.code = f.read()
     cls.uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(
         filename="", language="javascript", contents=cls.code).uast
     feature_extractor_output = cls.extractor.extract_features([
         FakeFile(path="test.py",
                  content=cls.code,
                  uast=cls.uast,
                  language="JavaScript")
     ])
     X, cls.y, (cls.vnodes_y, cls.vnodes, vnode_parents, node_parents) = \
         feature_extractor_output
 def setUpClass(cls):
     cls.maxDiff = None
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin:
         contents = fin.read()
     with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin:
         uast = bblfsh.Node.FromString(fin.read())
     config = FormatAnalyzer._load_config(get_config())
     fe_config = config["train"]["javascript"]
     cls.feature_extractor = FeatureExtractor(
         language="javascript",
         label_composites=label_composites,
         **fe_config["feature_extractor"])
     cls.file = File(content=bytes(contents, "utf-8"), uast=uast)
     cls.X, cls.y, (cls.vnodes_y, cls.vnodes, cls.vnode_parents, cls.node_parents) = \
         cls.feature_extractor.extract_features([cls.file])
 def test_evaluate_smoke_entry_integration(self):
     input_path = os.path.join(os.path.dirname(generate_smoke.__file__),
                               "data", "js_smoke_init.tar.xz")
     generate_smoke.js_format_rules = {"equal_no_space_style": (" = ", "=")}
     with tempfile.TemporaryDirectory(
             prefix="test-smoke-eval-") as outputpath:
         generate_smoke.generate_smoke_entry(input_path,
                                             outputpath,
                                             force=True)
         with open(os.path.join(outputpath, "index.csv")) as index:
             index_content = index.read().splitlines()
         self.assertEqual(len(index_content), 5)
         self.assertEqual(
             set(os.listdir(outputpath)),
             {"index.csv", "nodejs", "jsquery", "freeCodeCamp", "react"})
         with open(os.path.join(outputpath, "index.csv"), "w") as index:
             index.write("\n".join(index_content[::3]))
         report_dir = os.path.join(outputpath, "report")
         evaluate_smoke_entry(outputpath, report_dir, None, "0.0.0.0:9432",
                              get_config())
         report = pandas.read_csv(os.path.join(report_dir, "report.csv"))
         self.assertEqual(len(report), 4)
         self.assertEqual(len(report.columns), 10)
Exemple #11
0
 def setUp(self):
     config = FormatAnalyzer._load_config(get_config())["train"]
     self.extractor = FeatureExtractor(language="javascript",
                                       **config["javascript"]["feature_extractor"])
Exemple #12
0
    def test_generate_new_line(self):
        self.maxDiff = None
        expected_res = {
            "nothing changed": [],
            "remove new line in the end of 4th line":
            None,
            "indentation in the beginning":
            [" import { makeToast } from '../../common/app/Toasts/redux';"],
            "remove indentation in the 4th line till the end":
            [" return Object.keys(flash)", " }"],
            "new line between 6th and 7th regular code lines":
            ["\n      return messages.map(message => ({"],
            "new line in the middle of the 7th code line with indentation increase":
            ["      return messages\n        .map(message => ({", "  })"],
            "new line in the middle of the 7th code line with indentation decrease":
            ["      return messages\n    .map(message => ({", "      })"],
            "new line in the middle of the 7th code line without indentation increase":
            ["      return messages\n      .map(message => ({"],
            "change quotes":
            ['import { makeToast } from "../../common/app/Toasts/redux";'],
            "remove indentation decrease 11th line": ["        }));"],
            "change indentation decrease to indentation increase 11th line":
            ["          }));"],
            "change indentation decrease to indentation increase 11th line but keep the rest":
            ["          }));", "})"],
        }

        base = Path(__file__).parent
        # str() is needed for Python 3.5
        with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin:
            contents = fin.read()
        with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin:
            uast = bblfsh.Node.FromString(fin.read())
        config = FormatAnalyzer._load_config(get_config())
        fe_config = config["train"]["javascript"]

        for case in expected_res:
            offsets, y_pred, _ = cases[case]
            feature_extractor = FeatureExtractor(
                language="javascript",
                label_composites=label_composites,
                **fe_config["feature_extractor"])
            file = UnicodeFile(content=contents,
                               uast=uast,
                               path="",
                               language="")
            X, y, (vnodes_y, vnodes, vnode_parents, node_parents) = \
                feature_extractor.extract_features([file])
            y_cur = deepcopy(y)
            for offset, yi in zip(offsets, y_pred):
                i = None
                for i, vnode in enumerate(vnodes_y):  # noqa: B007
                    if offset == vnode.start.offset:
                        break
                y_cur[i] = yi
            code_generator = CodeGenerator(feature_extractor)
            pred_vnodes = code_generator.apply_predicted_y(
                vnodes, vnodes_y, list(range(len(vnodes_y))), FakeRules(y_cur))
            res = []
            for gln in FormatAnalyzer._group_line_nodes(
                    y, y_cur, vnodes_y, pred_vnodes, [1] * len(y)):
                line, (line_y, line_y_pred, line_vnodes_y, line_vnodes,
                       line_rule_winners) = gln
                new_code_line = code_generator.generate_new_line(line_vnodes)
                res.append(new_code_line)
            if expected_res[case] is not None:
                # None means that we delete some lines. We are not handle this properly now.
                self.assertEqual(res, expected_res[case], case)