def setUp(self): config = FormatAnalyzer._load_config(get_config()) self.annotated_file = AnnotationManager.from_file(self.file) self.final_config = config["train"]["javascript"] self.extractor = FeatureExtractor( language="javascript", **self.final_config["feature_extractor"]) self.annotated_file = AnnotationManager.from_file(self.file)
def setUpClass(cls): config = FormatAnalyzer._load_config( merge_dicts( get_config(), { "train": { "javascript": { "feature_extractor": { "left_siblings_window": 1, "right_siblings_window": 1, "parents_depth": 1, "node_features": ["start_line", "reserved", "roles"], }, }, }, }))["train"] base = Path(__file__).parent with lzma.open(str(base / "benchmark.js.xz"), mode="rt") as fin: contents = fin.read() with lzma.open(str(base / "benchmark.uast.xz")) as fin: uast = bblfsh.Node.FromString(fin.read()) file = File(content=bytes(contents, "utf-8"), uast=uast) files = [file, file] cls.fe = FeatureExtractor(language="javascript", **config["javascript"]["feature_extractor"]) cls.fe.extract_features(files) cls.class_representations = cls.fe.composite_class_representations cls.n_classes = len(cls.fe.labels_to_class_sequences) cls.ordinal = cls.return_node_feature(FeatureId.start_line) cls.categorical = cls.return_node_feature(FeatureId.reserved) cls.bag = cls.return_node_feature(FeatureId.roles)
def generate_local_test(mcs, case_name, uast, contents): fe_config = FormatAnalyzer._load_config( get_config())["train"]["javascript"] feature_extractor = FeatureExtractor(language="javascript", label_composites=label_composites, **fe_config["feature_extractor"]) file = UnicodeFile(content=contents, uast=uast, path="", language="") _, _, (vnodes_y, _, _, _) = feature_extractor.extract_features([file]) offsets, y_pred, result = cases[case_name] def _test(self): y_cur = deepcopy(self.y) for offset, yi in zip(offsets, y_pred): i = None for i, vnode in enumerate(vnodes_y): # noqa: B007 if offset == vnode.start.offset: break y_cur[i] = yi code_generator = CodeGenerator(self.feature_extractor) pred_vnodes = code_generator.apply_predicted_y( self.vnodes, self.vnodes_y, list(range(len(self.vnodes_y))), FakeRules(y_cur)) generated_file = code_generator.generate(pred_vnodes) self.assertEqual(generated_file, result) return _test
def test_quality_report_noisy(self): slogging.setup("DEBUG", False) with Capturing() as output, tempfile.TemporaryDirectory( ) as dir_output: try: quality_report_noisy(bblfsh=self.bblfsh, language=self.language, confidence_threshold=0.8, support_threshold=20, precision_threshold=0.95, dir_output=dir_output, config=get_config(), repos=REPOSITORIES) except SystemExit: self.skipTest("Matplotlib is required to run this test") pattern = re.compile( r"((?:prediction rate x)|(?:precision y)): \[(\d+.\d+(, \d+.\d+)+)\]" ) metrics = {} for line in output: match = pattern.search(line) if match: metric, scores_string = list(match.groups())[:2] scores_string = scores_string.split(", ") scores = [float(f) for f in scores_string] metrics[metric] = scores self.assertGreater(metrics["prediction rate x"][-1], 0) self.assertGreater(metrics["precision y"][-1], 0)
def test_train_review_analyzer_integration(self): """Integration test for review event.""" with AnalyzerContextManager(analyzer=QualityReportAnalyzer, db=self.db.name, fs=self.fs.name) as context: context.review( FROM_COMMIT, TO_COMMIT, git_dir=self.jquery_dir, config_json={QualityReportAnalyzer.name: get_config()})
def setUpClass(cls): base = Path(__file__).parent # str() is needed for Python 3.5 with lzma.open(str(base / "benchmark.js.xz"), mode="rt") as fin: contents = fin.read() with lzma.open(str(base / "benchmark.uast.xz")) as fin: uast = bblfsh.Node.FromString(fin.read()) file = File(content=bytes(contents, "utf-8"), uast=uast) cls.files = [file] config = FormatAnalyzer._load_config(get_config())["train"] cls.extractor = FeatureExtractor( language="javascript", **config["javascript"]["feature_extractor"])
def test_train_review_analyzer_integration(self): """Integration test for review event.""" with AnalyzerContextManager(analyzer=QualityReportAnalyzer, port=self.port, db=self.db.name, fs=self.fs.name): server.run("review", FROM_COMMIT, TO_COMMIT, port=self.port, git_dir=self.jquery_dir, config_json=json.dumps( {QualityReportAnalyzer.name: get_config()}))
def setUpClass(cls): config = FormatAnalyzer._load_config(get_config())["train"] cls.extractor = FeatureExtractor( language="javascript", **config["javascript"]["feature_extractor"]) test_js_code_filepath = Path(__file__).parent / "jquery.layout.js" with open(str(test_js_code_filepath), mode="rb") as f: cls.code = f.read() cls.uast = bblfsh.BblfshClient("0.0.0.0:9432").parse( filename="", language="javascript", contents=cls.code).uast feature_extractor_output = cls.extractor.extract_features([ FakeFile(path="test.py", content=cls.code, uast=cls.uast, language="JavaScript") ]) X, cls.y, (cls.vnodes_y, cls.vnodes, vnode_parents, node_parents) = \ feature_extractor_output
def setUpClass(cls): cls.maxDiff = None base = Path(__file__).parent # str() is needed for Python 3.5 with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin: contents = fin.read() with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin: uast = bblfsh.Node.FromString(fin.read()) config = FormatAnalyzer._load_config(get_config()) fe_config = config["train"]["javascript"] cls.feature_extractor = FeatureExtractor( language="javascript", label_composites=label_composites, **fe_config["feature_extractor"]) cls.file = File(content=bytes(contents, "utf-8"), uast=uast) cls.X, cls.y, (cls.vnodes_y, cls.vnodes, cls.vnode_parents, cls.node_parents) = \ cls.feature_extractor.extract_features([cls.file])
def test_evaluate_smoke_entry_integration(self): input_path = os.path.join(os.path.dirname(generate_smoke.__file__), "data", "js_smoke_init.tar.xz") generate_smoke.js_format_rules = {"equal_no_space_style": (" = ", "=")} with tempfile.TemporaryDirectory( prefix="test-smoke-eval-") as outputpath: generate_smoke.generate_smoke_entry(input_path, outputpath, force=True) with open(os.path.join(outputpath, "index.csv")) as index: index_content = index.read().splitlines() self.assertEqual(len(index_content), 5) self.assertEqual( set(os.listdir(outputpath)), {"index.csv", "nodejs", "jsquery", "freeCodeCamp", "react"}) with open(os.path.join(outputpath, "index.csv"), "w") as index: index.write("\n".join(index_content[::3])) report_dir = os.path.join(outputpath, "report") evaluate_smoke_entry(outputpath, report_dir, None, "0.0.0.0:9432", get_config()) report = pandas.read_csv(os.path.join(report_dir, "report.csv")) self.assertEqual(len(report), 4) self.assertEqual(len(report.columns), 10)
def setUp(self): config = FormatAnalyzer._load_config(get_config())["train"] self.extractor = FeatureExtractor(language="javascript", **config["javascript"]["feature_extractor"])
def test_generate_new_line(self): self.maxDiff = None expected_res = { "nothing changed": [], "remove new line in the end of 4th line": None, "indentation in the beginning": [" import { makeToast } from '../../common/app/Toasts/redux';"], "remove indentation in the 4th line till the end": [" return Object.keys(flash)", " }"], "new line between 6th and 7th regular code lines": ["\n return messages.map(message => ({"], "new line in the middle of the 7th code line with indentation increase": [" return messages\n .map(message => ({", " })"], "new line in the middle of the 7th code line with indentation decrease": [" return messages\n .map(message => ({", " })"], "new line in the middle of the 7th code line without indentation increase": [" return messages\n .map(message => ({"], "change quotes": ['import { makeToast } from "../../common/app/Toasts/redux";'], "remove indentation decrease 11th line": [" }));"], "change indentation decrease to indentation increase 11th line": [" }));"], "change indentation decrease to indentation increase 11th line but keep the rest": [" }));", "})"], } base = Path(__file__).parent # str() is needed for Python 3.5 with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin: contents = fin.read() with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin: uast = bblfsh.Node.FromString(fin.read()) config = FormatAnalyzer._load_config(get_config()) fe_config = config["train"]["javascript"] for case in expected_res: offsets, y_pred, _ = cases[case] feature_extractor = FeatureExtractor( language="javascript", label_composites=label_composites, **fe_config["feature_extractor"]) file = UnicodeFile(content=contents, uast=uast, path="", language="") X, y, (vnodes_y, vnodes, vnode_parents, node_parents) = \ feature_extractor.extract_features([file]) y_cur = deepcopy(y) for offset, yi in zip(offsets, y_pred): i = None for i, vnode in enumerate(vnodes_y): # noqa: B007 if offset == vnode.start.offset: break y_cur[i] = yi code_generator = CodeGenerator(feature_extractor) pred_vnodes = code_generator.apply_predicted_y( vnodes, vnodes_y, list(range(len(vnodes_y))), FakeRules(y_cur)) res = [] for gln in FormatAnalyzer._group_line_nodes( y, y_cur, vnodes_y, pred_vnodes, [1] * len(y)): line, (line_y, line_y_pred, line_vnodes_y, line_vnodes, line_rule_winners) = gln new_code_line = code_generator.generate_new_line(line_vnodes) res.append(new_code_line) if expected_res[case] is not None: # None means that we delete some lines. We are not handle this properly now. self.assertEqual(res, expected_res[case], case)