Ejemplo n.º 1
0
 def test_eval_aggregate(self):
     """Test on normal input, quality reports are aggregated."""
     q_report_header = "# Train report for javascript"
     input_pattern = os.path.join(self.jquery_dir, "**", "*")
     with Capturing() as output:
         print_reports(input_pattern=input_pattern,
                       bblfsh_addr=self.bblfsh,
                       language=self.language,
                       config={
                           "analyze": {
                               "language_defaults": {
                                   "uast_break_check": False
                               }
                           },
                           "aggregate": True
                       },
                       model_path=self.model_path)
     output = "\n".join(output)
     qcount = output.count(q_report_header)
     self.assertEqual(qcount, 1)
     output = output[output.find("# Train report"):output.
                     find("# Test report")]
     metrics = _get_metrics(output)
     expected_metrics = (0.9305755395683454, 0.9305755395683454,
                         0.8535136918508743, 0.9305755395683454,
                         0.8903803131991053, 0.9171890465193006, 2780, 3031)
     assert_almost_equal(metrics, expected_metrics, decimal=15)
Ejemplo n.º 2
0
 def test_eval(self):
     """Test on normal input."""
     q_report_header_train = "# Train report for javascript"
     q_report_header_test = "# Test report for javascript"
     input_pattern = os.path.join(self.jquery_dir, "**", "*")
     with Capturing() as output:
         print_reports(input_pattern=input_pattern,
                       bblfsh_addr=self.bblfsh,
                       language=self.language,
                       model_path=self.model_path,
                       config={
                           "analyze": {
                               "language_defaults": {
                                   "uast_break_check": False
                               }
                           }
                       })
     self.assertIn(q_report_header_train, output[0])
     self.assertIn("### Classification report", output)
     self.assertGreater(len(output), 100)
     output = "\n".join(output)
     test_report_start = output.find("Test report")
     self.assertNotEqual(test_report_start, -1)
     qcount = output.count(q_report_header_train)
     self.assertEqual(qcount, 14)
     qcount = output.count(q_report_header_test)
     self.assertEqual(qcount, 14)
Ejemplo n.º 3
0
    def test_eval_empty_input(self):
        """Test on empty folder - expect only model report."""
        with tempfile.TemporaryDirectory() as folder:
            input_pattern = os.path.join(folder, "**", "*")
            with Capturing() as output:
                print_reports(input_pattern=input_pattern,
                              bblfsh=self.bblfsh,
                              language=self.language,
                              model_path=self.model_path,
                              config={"uast_break_check": False})
            self.assertEqual(output[:3], [
                "# Model report for https://github.com/jquery/jquery refs/heads/master "
                "c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b",
                "",
                "### Dump",
            ])
            self.assertNotIn("# Quality report", output)
            self.assertGreater(len(output), 100)
            output = "\n".join(output)
            data = _get_json_data(output)["javascript"]
            self.assertEqual(data["num_rules"], 1269)
            self.assertEqual(data["avg_rule_len"], 19.10401891252955)
            self.assertEqual(data["max_conf"], 0.9999756217002869)
            self.assertEqual(data["min_conf"], 0.19736842811107635)
            self.assertEqual(data["max_support"], 20528)
            self.assertEqual(data["min_support"], 16)
            lines = """|Min support|16|
|Max support|20528|
|Min confidence|0.19736842811107635|
|Max confidence|0.9999756217002869|""".splitlines()
            for line in lines:
                self.assertIn(line, output)
            num_rules, avg_len = _get_model_summary(output)
            self.assertEqual(num_rules, 1269)
            self.assertEqual(avg_len, 19.10401891252955)
Ejemplo n.º 4
0
 def test_eval_aggregate(self):
     """Test on normal input, quality reports are aggregated."""
     input_pattern = os.path.join(self.jquery_dir, "**", "*")
     with Capturing() as output:
         print_reports(input_pattern=input_pattern,
                       bblfsh=self.bblfsh,
                       language=self.language,
                       model_path=self.model_path,
                       config={
                           "uast_break_check": False,
                           "aggregate": True
                       })
     qcount = output.count(
         "# Quality report for javascript / https://github.com/jquery/jquery refs/heads/master"
         " c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b")
     self.assertEqual(qcount, 1)
     output = "\n".join(output)
     output = output[:output.find(
         "# Model report for https://github.com/jquery/jquery")]
     metrics = _get_metrics(output)
     expected_metrics = Metrics(precision=0.9829633453794527,
                                ppcr=0.6570556309362280,
                                recall=0.9829633453794527,
                                full_recall=0.6458616010854816,
                                f1=0.9829633453794527,
                                full_f1=0.7795291709314227,
                                support=1937,
                                full_support=2948)
     assert_almost_equal(metrics, expected_metrics, decimal=15)
Ejemplo n.º 5
0
 def test_eval_aggregate(self):
     """Test on normal input, quality reports are aggregated."""
     q_report_header = "# Train report for javascript"
     input_pattern = os.path.join(self.jquery_dir, "**", "*")
     with Capturing() as output:
         print_reports(input_pattern=input_pattern,
                       bblfsh=self.bblfsh,
                       language=self.language,
                       config={
                           "analyze": {
                               "language_defaults": {
                                   "uast_break_check": False
                               }
                           },
                           "aggregate": True
                       },
                       model_path=self.model_path)
     output = "\n".join(output)
     qcount = output.count(q_report_header)
     self.assertEqual(qcount, 1)
     output = output[:output.find("# Model report for")]
     metrics = _get_metrics(output)
     expected_metrics = (0.9292385057471264, 0.9292385057471264,
                         0.8507070042749095, 0.9292385057471263,
                         0.8882403433476395, 0.9154883262084841, 2784, 3041)
     assert_almost_equal(metrics, expected_metrics, decimal=15)
Ejemplo n.º 6
0
 def test_no_model(self):
     """Test on wrong path to model - expect fail."""
     with tempfile.TemporaryDirectory() as folder:
         input_pattern = os.path.join(folder, "**", "*")
         with tempfile.NamedTemporaryFile() as empty_model:
             with self.assertRaises(ValueError):
                 print_reports(input_pattern=input_pattern,
                               bblfsh_addr=self.bblfsh,
                               language=self.language,
                               model_path=empty_model,
                               config={"uast_break_check": False})
Ejemplo n.º 7
0
 def test_eval_empty_input(self):
     """Test on empty folder - expect only model and test report."""
     config = {
         "analyze": {
             "language_defaults": {
                 "uast_break_check": False
             }
         },
         "aggregate": True
     }
     with tempfile.TemporaryDirectory() as folder:
         input_pattern = os.path.join(folder, "**", "*")
         with Capturing() as output:
             print_reports(input_pattern=input_pattern,
                           bblfsh_addr=self.bblfsh,
                           language=self.language,
                           model_path=self.model_path,
                           config=config)
         self.assertEqual(output[:3], [
             "# Model report for file:///var/folders/kw/93jybvs16_954hytgsq6ld7r0000gn/T/"
             "top-repos-quality-repos-jigt1n8g/jquery HEAD "
             "dae5f3ce3d2df27873d01f0d9682f6a91ad66b87",
             "",
             "### Dump",
         ])
         self.assertGreater(len(output), 100)
         output = "\n".join(output)
         self.assertNotIn("# Train report", output)
         test_report_start = output.find("Test report")
         self.assertNotEqual(test_report_start, -1)
         output = output[:test_report_start]
         model_data = _get_json_data(output)["javascript"]
         self.assertEqual(
             model_data, {
                 "avg_rule_len": 11.214128035320089,
                 "max_conf": 0.9999598264694214,
                 "max_support": 21880,
                 "min_conf": 0.9206641912460327,
                 "min_support": 81,
                 "num_rules": 453,
             })
         lines = [
             "|Min support|81|", "|Max support|21880|",
             "|Min confidence|0.9206641912460327|",
             "|Max confidence|0.9999598264694214|"
         ]
         for line in lines:
             self.assertIn(line, output)
         num_rules, avg_len = _get_model_summary(output)
         self.assertEqual(num_rules, 453)
         self.assertAlmostEqual(avg_len, 11.214128035320089)
Ejemplo n.º 8
0
 def test_eval(self):
     """Test on normal input."""
     input_pattern = os.path.join(self.jquery_dir, "**", "*")
     with Capturing() as output:
         print_reports(input_pattern=input_pattern,
                       bblfsh=self.bblfsh,
                       language=self.language,
                       model_path=self.model_path,
                       config={"uast_break_check": False})
     self.assertEqual([
         "# Quality report for javascript / https://github.com/jquery/jquery refs/heads/master"
         " c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b", "",
         "### Classification report"
     ], output[:3])
     qcount = output.count(
         "# Quality report for javascript / https://github.com/jquery/jquery refs/heads/master"
         " c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b")
     self.assertEqual(qcount, 14)
     self.assertIn("### Summary", output)
     self.assertIn(
         "# Model report for https://github.com/jquery/jquery refs/heads/master"
         " c2026b117d1ca5b2e42a52c7e2a8ae8988cf0d4b", output)
     self.assertGreater(len(output), 100)
     self.assertIn("javascript", _get_json_data("\n".join(output)))