Exemple #1
0
    def generate_file_fixes(self, data_service: DataService, changes: Sequence[Change],
                            ) -> Iterator[FileFix]:
        """
        Generate all data required for any type of further processing.

        Next processing can be comment generation or performance report generation.

        :param data_service: Connection to the Lookout data retrieval service.
        :param changes: The list of changes in the pointed state.
        :return: Iterator with unrendered data per comment.
        """
        log = self._log
        base_files_by_lang = files_by_language(c.base for c in changes)
        head_files_by_lang = files_by_language(c.head for c in changes)
        processed_files_counter = defaultdict(int)
        processed_fixes_counter = defaultdict(int)
        for lang, head_files in head_files_by_lang.items():
            if lang not in self.model:
                log.warning("skipped %d written in %s. Rules for %s do not exist in model",
                            len(head_files), lang, lang)
                continue
            rules = self.model[lang]
            config = self.analyze_config[lang]
            rules = rules.filter_by_confidence(config["confidence_threshold"]) \
                .filter_by_support(config["support_threshold"])
            for file in filter_files(head_files, rules.origin_config["line_length_limit"],
                                     rules.origin_config["overall_size_limit"], log=log):
                processed_files_counter[lang] += 1
                try:
                    prev_file = base_files_by_lang[lang][file.path]
                except KeyError:
                    prev_file = None
                    lines = None
                else:
                    lines = sorted(chain.from_iterable((
                        find_new_lines(prev_file, file),
                        find_deleted_lines(prev_file, file),
                    )))
                log.debug("%s %s", file.path, lines)
                fe = FeatureExtractor(language=lang, **rules.origin_config["feature_extractor"])
                feature_extractor_output = fe.extract_features([file], [lines])
                if feature_extractor_output is None:
                    submit_event("%s.analyze.%s.parse_failures" % (self.name, lang), 1)
                    if config["report_parse_failures"]:
                        log.warning("Failed to parse %s", file.path)
                        yield FileFix(error="Failed to parse", head_file=file, language=lang,
                                      feature_extractor=fe, base_file=prev_file, file_vnodes=[],
                                      line_fixes=[], y_pred_pure=None, y=None)
                else:
                    fixes, file_vnodes, y_pred_pure, y = self._generate_token_fixes(
                        file, fe, feature_extractor_output, data_service.get_bblfsh(), rules)
                    log.debug("%s %d fixes", file.path, len(fixes))
                    processed_fixes_counter[lang] += len(fixes)
                    yield FileFix(error="", head_file=file, language=lang, feature_extractor=fe,
                                  base_file=prev_file, file_vnodes=file_vnodes, line_fixes=fixes,
                                  y_pred_pure=y_pred_pure, y=y)
        for key, val in processed_files_counter.items():
            submit_event("%s.analyze.%s.files" % (self.name, key), val)
        for key, val in processed_fixes_counter.items():
            submit_event("%s.analyze.%s.fixes" % (self.name, key), val)
Exemple #2
0
class DataRequestsTests(unittest.TestCase, EventHandlers):
    COMMIT_FROM = "3ac2a59275902f7252404d26680e30cc41efb837"
    COMMIT_TO = "dce7fcba3d2151a0d5dc4b3a89cfc0911c96cf2b"

    def setUp(self):
        self.setUpEvent = threading.Event()
        self.tearDownEvent = threading.Event()
        self.port = find_port()
        self.lookout_sdk = LookoutSDK()
        self.listener = EventListener("localhost:%d" % self.port, self).start()
        self.server_thread = threading.Thread(target=self.run_data_service)
        self.server_thread.start()
        self.data_service = DataService("localhost:10301")
        self.url = "file://" + str(
            Path(lookout.core.__file__).parent.parent.absolute())
        self.ref = "refs/heads/master"
        self.setUpWasSuccessful = True
        self.setUpEvent.wait()
        if not self.setUpWasSuccessful:
            self.fail("failed to setUp()")

    def tearDown(self):
        self.data_service.shutdown()
        self.tearDownEvent.set()
        self.listener.stop()
        self.server_thread.join()

    def process_review_event(self, request: ReviewEvent) -> EventResponse:
        self.setUpEvent.set()
        self.tearDownEvent.wait()
        return EventResponse()

    def process_push_event(self, request: PushEvent) -> EventResponse:
        self.setUpEvent.set()
        self.tearDownEvent.wait()
        return EventResponse()

    def run_data_service(self):
        try:
            self.lookout_sdk.push(self.COMMIT_FROM,
                                  self.COMMIT_TO,
                                  self.port,
                                  git_dir=os.getenv(
                                      "LOOKOUT_SDK_ML_TESTS_GIT_DIR", "."))
        except Exception as e:
            print(type(e).__name__, e)
            self.setUpWasSuccessful = False
            self.setUpEvent.set()

    def test_with_changed_uasts(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(change.base.content, b"")
            self.assertEqual(change.head.content, b"")
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)

    def test_with_changed_uasts_rpc_error(self):
        called = False

        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            nonlocal called
            called = True

        def fail(f):
            def wrapped():
                f()
                self.assertIsNotNone(
                    self.data_service._data_request_local.channel)
                raise grpc.RpcError()

            return wrapped

        self.data_service._get_channel = fail(self.data_service._get_channel)
        func = with_changed_uasts(unicode=False)(func)

        self.assertRaises(
            grpc.RpcError, func, self,
            ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
            ReferencePointer(self.url, self.ref, self.COMMIT_TO),
            self.data_service)
        self.assertFalse(called)
        self.assertIsNone(self.data_service._data_request_local.channel)

    def test_with_changed_contents(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(len(change.base.content), 5548)
            self.assertEqual(len(change.head.content), 5542)
            self.assertFalse(change.base.uast.children)
            self.assertFalse(change.head.uast.children)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)

    def test_with_changed_uasts_and_contents(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(len(change.base.content), 5548)
            self.assertEqual(len(change.head.content), 5542)
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts_and_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)

    def test_with_uasts(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            for file in files:
                self.assertEqual(file.content, b"")
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)

    def test_with_uasts_rpc_error(self):
        called = False

        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            nonlocal called
            called = True

        def fail(f):
            def wrapped():
                f()
                self.assertIsNotNone(
                    self.data_service._data_request_local.channel)
                raise grpc.RpcError()

            return wrapped

        self.data_service._get_channel = fail(self.data_service._get_channel)

        func = with_uasts(unicode=False)(func)
        self.assertRaises(grpc.RpcError, func, self,
                          ReferencePointer(self.url, self.ref, self.COMMIT_TO),
                          None, self.data_service)
        self.assertFalse(called)
        self.assertIsNone(self.data_service._data_request_local.channel)

    def test_with_contents(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            non_empty_langs = 0
            for file in files:
                if not file.path.endswith("__init__.py"):
                    self.assertGreater(len(file.content), 0, file.path)
                self.assertFalse(file.uast.children)
                self.assertTrue(file.path)
                if file.language:
                    non_empty_langs += 1
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))
            self.assertGreater(non_empty_langs, 0)

        func = with_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)

    def test_with_uasts_and_contents(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            for file in files:
                if not file.path.endswith("__init__.py"):
                    self.assertGreater(len(file.content), 0, file.path)
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts_and_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)

    def test_babelfish(self):
        uast, errors = parse_uast(self.data_service.get_bblfsh(),
                                  "console.log('hi');",
                                  "hi.js",
                                  unicode=False)
        self.assertIsInstance(uast, bblfsh.Node)
        self.assertEqual(len(errors), 0, str(errors))

    def test_check_bblfsh_driver_versions(self):
        self.assertRaises(UnsatisfiedDriverVersionError,
                          self.data_service.check_bblfsh_driver_versions,
                          ["brainfuck>=1.0"])
        self.assertRaises(UnsatisfiedDriverVersionError,
                          self.data_service.check_bblfsh_driver_versions,
                          ["javascript<1.0"])
        self.data_service.check_bblfsh_driver_versions(
            ["javascript>=1.3.0,<10.0"])

    def test_with_changed_uasts_unicode(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(change.base.content, "")
            self.assertEqual(change.head.content, "")
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts(unicode=True)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)

    def test_with_uasts_unicode(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            for file in files:
                self.assertIsInstance(file, UnicodeFile)
                self.assertEqual(file.content, "")
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts(unicode=True)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)

    def test_babelfish_unicode(self):
        content = b"console.log('\xc3\x80');"

        uast_uni, errors_uni = parse_uast(self.data_service.get_bblfsh(),
                                          content.decode(),
                                          "test.js",
                                          unicode=True)
        uast, errors = parse_uast(self.data_service.get_bblfsh(),
                                  content.decode(),
                                  "test.js",
                                  unicode=False)
        self.assertIsInstance(uast, bblfsh.Node)
        self.assertIsInstance(uast_uni, bblfsh.Node)
        self.assertEqual(errors_uni, errors)
        check_uast_transformation(self, content, uast, uast_uni)