def test_eval_aggregate(self): """Test on normal input, quality reports are aggregated.""" q_report_header = "# Train report for javascript" input_pattern = os.path.join(self.jquery_dir, "**", "*") with Capturing() as output: print_reports(input_pattern=input_pattern, bblfsh_addr=self.bblfsh, language=self.language, config={ "analyze": { "language_defaults": { "uast_break_check": False } }, "aggregate": True }, model_path=self.model_path) output = "\n".join(output) qcount = output.count(q_report_header) self.assertEqual(qcount, 1) output = output[output.find("# Train report"):output. find("# Test report")] metrics = _get_metrics(output) expected_metrics = (0.9305755395683454, 0.9305755395683454, 0.8535136918508743, 0.9305755395683454, 0.8903803131991053, 0.9171890465193006, 2780, 3031) assert_almost_equal(metrics, expected_metrics, decimal=15)
def test_eval(self): """Test on normal input.""" q_report_header_train = "# Train report for javascript" q_report_header_test = "# Test report for javascript" input_pattern = os.path.join(self.jquery_dir, "**", "*") with Capturing() as output: print_reports(input_pattern=input_pattern, bblfsh_addr=self.bblfsh, language=self.language, model_path=self.model_path, config={ "analyze": { "language_defaults": { "uast_break_check": False } } }) self.assertIn(q_report_header_train, output[0]) self.assertIn("### Classification report", output) self.assertGreater(len(output), 100) output = "\n".join(output) test_report_start = output.find("Test report") self.assertNotEqual(test_report_start, -1) qcount = output.count(q_report_header_train) self.assertEqual(qcount, 14) qcount = output.count(q_report_header_test) self.assertEqual(qcount, 14)
def test_eval_empty_input(self): """Test on empty folder - expect only model report.""" with tempfile.TemporaryDirectory() as folder: input_pattern = os.path.join(folder, "**", "*") with Capturing() as output: print_reports(input_pattern=input_pattern, bblfsh=self.bblfsh, language=self.language, model_path=self.model_path, config={"uast_break_check": False}) self.assertEqual(output[:3], [ "# Model report for https://github.com/jquery/jquery refs/heads/master " "c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b", "", "### Dump", ]) self.assertNotIn("# Quality report", output) self.assertGreater(len(output), 100) output = "\n".join(output) data = _get_json_data(output)["javascript"] self.assertEqual(data["num_rules"], 1269) self.assertEqual(data["avg_rule_len"], 19.10401891252955) self.assertEqual(data["max_conf"], 0.9999756217002869) self.assertEqual(data["min_conf"], 0.19736842811107635) self.assertEqual(data["max_support"], 20528) self.assertEqual(data["min_support"], 16) lines = """|Min support|16| |Max support|20528| |Min confidence|0.19736842811107635| |Max confidence|0.9999756217002869|""".splitlines() for line in lines: self.assertIn(line, output) num_rules, avg_len = _get_model_summary(output) self.assertEqual(num_rules, 1269) self.assertEqual(avg_len, 19.10401891252955)
def test_eval_aggregate(self): """Test on normal input, quality reports are aggregated.""" input_pattern = os.path.join(self.jquery_dir, "**", "*") with Capturing() as output: print_reports(input_pattern=input_pattern, bblfsh=self.bblfsh, language=self.language, model_path=self.model_path, config={ "uast_break_check": False, "aggregate": True }) qcount = output.count( "# Quality report for javascript / https://github.com/jquery/jquery refs/heads/master" " c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b") self.assertEqual(qcount, 1) output = "\n".join(output) output = output[:output.find( "# Model report for https://github.com/jquery/jquery")] metrics = _get_metrics(output) expected_metrics = Metrics(precision=0.9829633453794527, ppcr=0.6570556309362280, recall=0.9829633453794527, full_recall=0.6458616010854816, f1=0.9829633453794527, full_f1=0.7795291709314227, support=1937, full_support=2948) assert_almost_equal(metrics, expected_metrics, decimal=15)
def test_eval_aggregate(self): """Test on normal input, quality reports are aggregated.""" q_report_header = "# Train report for javascript" input_pattern = os.path.join(self.jquery_dir, "**", "*") with Capturing() as output: print_reports(input_pattern=input_pattern, bblfsh=self.bblfsh, language=self.language, config={ "analyze": { "language_defaults": { "uast_break_check": False } }, "aggregate": True }, model_path=self.model_path) output = "\n".join(output) qcount = output.count(q_report_header) self.assertEqual(qcount, 1) output = output[:output.find("# Model report for")] metrics = _get_metrics(output) expected_metrics = (0.9292385057471264, 0.9292385057471264, 0.8507070042749095, 0.9292385057471263, 0.8882403433476395, 0.9154883262084841, 2784, 3041) assert_almost_equal(metrics, expected_metrics, decimal=15)
def test_no_model(self): """Test on wrong path to model - expect fail.""" with tempfile.TemporaryDirectory() as folder: input_pattern = os.path.join(folder, "**", "*") with tempfile.NamedTemporaryFile() as empty_model: with self.assertRaises(ValueError): print_reports(input_pattern=input_pattern, bblfsh_addr=self.bblfsh, language=self.language, model_path=empty_model, config={"uast_break_check": False})
def test_eval_empty_input(self): """Test on empty folder - expect only model and test report.""" config = { "analyze": { "language_defaults": { "uast_break_check": False } }, "aggregate": True } with tempfile.TemporaryDirectory() as folder: input_pattern = os.path.join(folder, "**", "*") with Capturing() as output: print_reports(input_pattern=input_pattern, bblfsh_addr=self.bblfsh, language=self.language, model_path=self.model_path, config=config) self.assertEqual(output[:3], [ "# Model report for file:///var/folders/kw/93jybvs16_954hytgsq6ld7r0000gn/T/" "top-repos-quality-repos-jigt1n8g/jquery HEAD " "dae5f3ce3d2df27873d01f0d9682f6a91ad66b87", "", "### Dump", ]) self.assertGreater(len(output), 100) output = "\n".join(output) self.assertNotIn("# Train report", output) test_report_start = output.find("Test report") self.assertNotEqual(test_report_start, -1) output = output[:test_report_start] model_data = _get_json_data(output)["javascript"] self.assertEqual( model_data, { "avg_rule_len": 11.214128035320089, "max_conf": 0.9999598264694214, "max_support": 21880, "min_conf": 0.9206641912460327, "min_support": 81, "num_rules": 453, }) lines = [ "|Min support|81|", "|Max support|21880|", "|Min confidence|0.9206641912460327|", "|Max confidence|0.9999598264694214|" ] for line in lines: self.assertIn(line, output) num_rules, avg_len = _get_model_summary(output) self.assertEqual(num_rules, 453) self.assertAlmostEqual(avg_len, 11.214128035320089)
def test_eval(self): """Test on normal input.""" input_pattern = os.path.join(self.jquery_dir, "**", "*") with Capturing() as output: print_reports(input_pattern=input_pattern, bblfsh=self.bblfsh, language=self.language, model_path=self.model_path, config={"uast_break_check": False}) self.assertEqual([ "# Quality report for javascript / https://github.com/jquery/jquery refs/heads/master" " c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b", "", "### Classification report" ], output[:3]) qcount = output.count( "# Quality report for javascript / https://github.com/jquery/jquery refs/heads/master" " c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b") self.assertEqual(qcount, 14) self.assertIn("### Summary", output) self.assertIn( "# Model report for https://github.com/jquery/jquery refs/heads/master" " c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b", output) self.assertGreater(len(output), 100) self.assertIn("javascript", _get_json_data("\n".join(output)))