Esempio n. 1
0
def run_analyzers(args: argparse.Namespace):
    """
    Launch the service with the specified analyzers. Blocks until a KeyboardInterrupt.

    :param args: Parsed command line arguments.
    :return: None
    """
    log = logging.getLogger("run")
    model_repository = create_model_repo_from_args(args)
    log.info("Created %s", model_repository)
    if args.request_server == "auto":
        data_request_address = "%s:10301" % args.server.split(":")[0]
    else:
        data_request_address = args.request_server
    data_service = DataService(data_request_address)
    log.info("Created %s", data_service)
    sys.path.append(os.getcwd())
    manager = AnalyzerManager(
        analyzers=[
            importlib.import_module(a).analyzer_class for a in args.analyzer
        ],
        model_repository=model_repository,
        data_service=data_service,
    )
    sys.path = sys.path[:-1]
    log.info("Created %s", manager)
    listener = EventListener(address=args.server,
                             handlers=manager,
                             n_workers=args.workers)
    log.info("Created %s", listener)
    listener.start()
    log.info("Listening %s", args.server)
    listener.block()
    model_repository.shutdown()
    data_service.shutdown()
Esempio n. 2
0
 def setUp(self):
     self.setUpEvent = threading.Event()
     self.tearDownEvent = threading.Event()
     self.port = server.find_port()
     self.listener = EventListener("localhost:%d" % self.port, self).start()
     self.server_thread = threading.Thread(target=self.run_data_service)
     self.server_thread.start()
     self.data_service = DataService("localhost:10301")
     self.url = "file://" + str(Path(lookout.__file__).parent.absolute())
     self.ref = "refs/heads/master"
     self.setUpEvent.wait()
class AnalyzerContextManager:
    """Context manager for launching analyzer."""
    def __init__(self,
                 analyzer: Type[Analyzer],
                 port: int,
                 db: str,
                 fs: str,
                 init: bool = True,
                 data_request_address: str = "localhost:10301"):
        """
        Init analyzer: model_repository, data_service, arguments, etc.

        :param port: port to use for analyzer.
        :param db: path to sqlite database location.
        :param fs: location where to store results of launched analyzer.
        :param analyzer: analyzer class to use.
        :param init: To run `analyzer init` or not. \
                     If you want to reuse existing database set False.
        :param data_request_address: DataService GRPC endpoint to use.
        """
        self.analyzer = analyzer
        self.port = port
        self.init = init
        self.data_request_address = data_request_address
        self._sql_alchemy_model_args = Namespace(
            db="sqlite:///%s" % db,
            fs=fs,
            cache_size="1G",
            cache_ttl="6h",
            db_kwargs={},
        )

    def __enter__(self) -> "AnalyzerContextManager":
        self.model_repository = create_model_repo_from_args(
            self._sql_alchemy_model_args)
        if self.init:
            self.model_repository.init()
        self.data_service = DataService(self.data_request_address)
        self.manager = AnalyzerManager(analyzers=[self.analyzer],
                                       model_repository=self.model_repository,
                                       data_service=self.data_service)
        self.listener = EventListener(address="0.0.0.0:%d" % self.port,
                                      handlers=self.manager,
                                      n_workers=1)
        self.listener.start()
        return self

    def __exit__(self, exc_type=None, exc_val=None, exc_tb=None):
        self.listener.stop()
        self.model_repository.shutdown()
        self.data_service.shutdown()
 def __enter__(self) -> "AnalyzerContextManager":
     self.model_repository = create_model_repo_from_args(
         self._sql_alchemy_model_args)
     if self.init:
         self.model_repository.init()
     self.data_service = DataService(self.data_request_address)
     self.manager = AnalyzerManager(analyzers=[self.analyzer],
                                    model_repository=self.model_repository,
                                    data_service=self.data_service)
     self.listener = EventListener(address="0.0.0.0:%d" % self.port,
                                   handlers=self.manager,
                                   n_workers=1)
     self.listener.start()
     return self
Esempio n. 5
0
    def __init__(self,
                 port: int,
                 db: str,
                 fs: str,
                 config: str = "",
                 analyzer: Union[
                     str, Sequence[str],
                     Iterable[Type[Analyzer]]] = "lookout.style.format",
                 init: bool = True) -> None:
        """
        Init analyzer: model_repository, data_service, arguments, etc.

        :param port: port to use for analyzer.
        :param db: database location.
        :param fs: location where to store results of launched analyzer.
        :param config: Path to the configuration file with option defaults. If empty - skip.
        :param analyzer: analyzer(s) to use.
        :param init: To run `analyzer init` or not. \
                     If you want to reuse existing database set False.
        """
        self.port = port
        self.db = db
        self.fs = fs
        self.config_path = config  # mimic TestAnalyzer - not used so far
        if isinstance(analyzer, (str, type)):
            self.analyzer = [analyzer]
        if isinstance(self.analyzer[0], str):
            self.analyzer = [
                importlib.import_module(a).analyzer_class
                for a in self.analyzer
            ]

        class Args:
            pass

        self.args = Namespace()
        self.args.db = "sqlite:///%s" % self.db
        self.args.fs = self.fs
        self.args.cache_size = "1G"
        self.args.cache_ttl = "6h"
        self.args.db_kwargs = {}
        self.args.workers = 1
        # initialize model repository
        self.model_repository = create_model_repo_from_args(self.args)
        if init:
            self.model_repository.init()
        # initialize a new instance of DataService
        data_request_address = "0.0.0.0:10301"
        self.data_service = DataService(data_request_address)
Esempio n. 6
0
 def setUp(self):
     self.setUpEvent = threading.Event()
     self.tearDownEvent = threading.Event()
     self.port = find_port()
     self.lookout_sdk = LookoutSDK()
     self.listener = EventListener("localhost:%d" % self.port, self).start()
     self.server_thread = threading.Thread(target=self.run_data_service)
     self.server_thread.start()
     self.data_service = DataService("localhost:10301")
     self.url = "file://" + str(
         Path(lookout.core.__file__).parent.parent.absolute())
     self.ref = "refs/heads/master"
     self.setUpWasSuccessful = True
     self.setUpEvent.wait()
     if not self.setUpWasSuccessful:
         self.fail("failed to setUp()")
Esempio n. 7
0
    def generate_file_fixes(self, data_service: DataService, changes: Sequence[Change],
                            ) -> Iterator[FileFix]:
        """
        Generate all data required for any type of further processing.

        Next processing can be comment generation or performance report generation.

        :param data_service: Connection to the Lookout data retrieval service.
        :param changes: The list of changes in the pointed state.
        :return: Iterator with unrendered data per comment.
        """
        log = self._log
        base_files_by_lang = files_by_language(c.base for c in changes)
        head_files_by_lang = files_by_language(c.head for c in changes)
        processed_files_counter = defaultdict(int)
        processed_fixes_counter = defaultdict(int)
        for lang, head_files in head_files_by_lang.items():
            if lang not in self.model:
                log.warning("skipped %d written in %s. Rules for %s do not exist in model",
                            len(head_files), lang, lang)
                continue
            rules = self.model[lang]
            config = self.analyze_config[lang]
            rules = rules.filter_by_confidence(config["confidence_threshold"]) \
                .filter_by_support(config["support_threshold"])
            for file in filter_files(head_files, rules.origin_config["line_length_limit"],
                                     rules.origin_config["overall_size_limit"], log=log):
                processed_files_counter[lang] += 1
                try:
                    prev_file = base_files_by_lang[lang][file.path]
                except KeyError:
                    prev_file = None
                    lines = None
                else:
                    lines = sorted(chain.from_iterable((
                        find_new_lines(prev_file, file),
                        find_deleted_lines(prev_file, file),
                    )))
                log.debug("%s %s", file.path, lines)
                fe = FeatureExtractor(language=lang, **rules.origin_config["feature_extractor"])
                feature_extractor_output = fe.extract_features([file], [lines])
                if feature_extractor_output is None:
                    submit_event("%s.analyze.%s.parse_failures" % (self.name, lang), 1)
                    if config["report_parse_failures"]:
                        log.warning("Failed to parse %s", file.path)
                        yield FileFix(error="Failed to parse", head_file=file, language=lang,
                                      feature_extractor=fe, base_file=prev_file, file_vnodes=[],
                                      line_fixes=[], y_pred_pure=None, y=None)
                else:
                    fixes, file_vnodes, y_pred_pure, y = self._generate_token_fixes(
                        file, fe, feature_extractor_output, data_service.get_bblfsh(), rules)
                    log.debug("%s %d fixes", file.path, len(fixes))
                    processed_fixes_counter[lang] += len(fixes)
                    yield FileFix(error="", head_file=file, language=lang, feature_extractor=fe,
                                  base_file=prev_file, file_vnodes=file_vnodes, line_fixes=fixes,
                                  y_pred_pure=y_pred_pure, y=y)
        for key, val in processed_files_counter.items():
            submit_event("%s.analyze.%s.files" % (self.name, key), val)
        for key, val in processed_fixes_counter.items():
            submit_event("%s.analyze.%s.fixes" % (self.name, key), val)
Esempio n. 8
0
    def run(self,
            ptr_from: ReferencePointer,
            data_service_head: DataService,
            data_service_base: Optional[DataService] = None
            ) -> Iterable[FileFix]:
        """
        Run `generate_file_fixes` for all files in ptr_from revision.

        :param ptr_from: Git repository state pointer to the base revision.
        :param data_service_head: Connection to the Lookout data retrieval service to get \
                                the new files.
        :param data_service_base: Connection to the Lookout data retrieval service to get \
                                  the initial files. If it is None, we assume the empty contents.
        :return: Generator of fixes for each file.
        """
        files_head = list(
            request_files(data_service_head.get_data(),
                          ptr_from,
                          contents=True,
                          uast=True,
                          unicode=True))

        if data_service_base is not None:
            files_base = list(
                request_files(data_service_base.get_data(),
                              ptr_from,
                              contents=True,
                              uast=True,
                              unicode=True))
        else:
            files_base = [File(path=f.path) for f in files_head]
        return self.generate_file_fixes(
            data_service_head,
            [self.Changes(f1, f2) for f1, f2 in zip(files_base, files_head)])
Esempio n. 9
0
 def __enter__(self) -> "AnalyzerContextManager":
     """
     Create the context and run the events listener.
     """
     self.model_repository = create_model_repo_from_args(self._sql_alchemy_model_args)
     if self.init:
         self.model_repository.init()
     self.data_service = DataService(self.data_request_address)
     self.manager = AnalyzerManager(analyzers=[self.analyzer],
                                    model_repository=self.model_repository,
                                    data_service=self.data_service)
     if not check_port_free(self._port):
         self._port = find_port()
     self.listener = EventListener(address="0.0.0.0:%d" % self._port, handlers=self.manager,
                                   n_workers=1)
     self.listener.start()
     self._lookout_sdk = LookoutSDK()
     return self
Esempio n. 10
0
    def run(self, ptr: ReferencePointer,
            data_service: DataService) -> Iterable[TypoFix]:
        """
        Run `generate_typos_fixes` for all lines and all files in `ptr_from` revision.

        :param ptr: Git repository state pointer to the revision that should be analyzed.
        :param data_service: Connection to the Lookout data retrieval service to get the files.
        :return: Generator of fixes for each file.
        """
        for file in request_files(data_service.get_data(),
                                  ptr,
                                  contents=True,
                                  uast=True,
                                  unicode=False):
            if file.path == self.config["analyze"]["filepath"]:
                break
        else:
            raise ValueError("No such file %s in %s" %
                             (self.config["analyze"]["filepath"], ptr))
        line = self.config["analyze"]["line"] + 1
        try:
            self._find_new_lines_return_value = [line]
            typos_fixes = list(
                self.generate_typos_fixes([Change(head=file, base=file)]))
            line_identifiers = self._get_identifiers(file.uast, [line])
            line_identifiers = [n.token for n in line_identifiers]
            identifiers_number = len(set(line_identifiers))
            if not identifiers_number:
                raise ValueError("No identifiers for %s:%d in %s" %
                                 (self.config["analyze"]["filepath"],
                                  self.config["analyze"]["line"] + 1, ptr))
            assert self.config["analyze"]["wrong_id"] in line_identifiers, \
                "Identifier %s was not found in the %s:%d.\nLine identifiers are %s" % (
                    self.config["analyze"]["wrong_id"], self.config["analyze"]["filepath"], line,
                    line_identifiers)
            if typos_fixes:
                return typos_fixes
            return [
                TypoFix(content=file.content.decode("utf-8", "replace"),
                        path=file.path,
                        line_number=0,
                        identifier="",
                        candidates=[],
                        identifiers_number=identifiers_number)
            ]
        finally:
            self._find_new_lines_return_value = None
Esempio n. 11
0
    def run(self, ptr: ReferencePointer,
            data_service: DataService) -> Iterable[TypoFix]:
        """
        Run `generate_typos_fixes` for all lines and all files in `ptr_from` revision.

        :param ptr: Git repository state pointer to the revision that should be analyzed.
        :param data_service: Connection to the Lookout data retrieval service to get the files.
        :return: Generator of fixes for each file.
        """
        for file in request_files(data_service.get_data(),
                                  ptr,
                                  contents=True,
                                  uast=True,
                                  unicode=False):
            if file.path == self.config["filepath_to_analyze"]:
                break
        else:
            raise ValueError("No such file %s in %s" %
                             (self.config["filepath_to_analyze"], ptr))

        typos_fixes = list(
            self.generate_typos_fixes([
                UnicodeChange(head=file,
                              base=File(path=file.path,
                                        language=file.language))
            ]))
        if typos_fixes:
            return typos_fixes
        identifiers_number = len(self._get_identifiers(file.uast, []))
        if not identifiers_number:
            raise ValueError("No identifiers for file %s in %s" %
                             (self.config["filepath_to_analyze"], ptr))
        return [
            TypoFix(content=file.content.decode("utf-8", "replace"),
                    path=file.path,
                    line_number=0,
                    identifier="",
                    candidates=[],
                    identifiers_number=identifiers_number)
        ]
Esempio n. 12
0
class DataRequestsTests(unittest.TestCase, EventHandlers):
    COMMIT_FROM = "3ac2a59275902f7252404d26680e30cc41efb837"
    COMMIT_TO = "dce7fcba3d2151a0d5dc4b3a89cfc0911c96cf2b"

    def setUp(self):
        self.setUpEvent = threading.Event()
        self.tearDownEvent = threading.Event()
        self.port = find_port()
        self.lookout_sdk = LookoutSDK()
        self.listener = EventListener("localhost:%d" % self.port, self).start()
        self.server_thread = threading.Thread(target=self.run_data_service)
        self.server_thread.start()
        self.data_service = DataService("localhost:10301")
        self.url = "file://" + str(
            Path(lookout.core.__file__).parent.parent.absolute())
        self.ref = "refs/heads/master"
        self.setUpWasSuccessful = True
        self.setUpEvent.wait()
        if not self.setUpWasSuccessful:
            self.fail("failed to setUp()")

    def tearDown(self):
        self.data_service.shutdown()
        self.tearDownEvent.set()
        self.listener.stop()
        self.server_thread.join()

    def process_review_event(self, request: ReviewEvent) -> EventResponse:
        self.setUpEvent.set()
        self.tearDownEvent.wait()
        return EventResponse()

    def process_push_event(self, request: PushEvent) -> EventResponse:
        self.setUpEvent.set()
        self.tearDownEvent.wait()
        return EventResponse()

    def run_data_service(self):
        try:
            self.lookout_sdk.push(self.COMMIT_FROM,
                                  self.COMMIT_TO,
                                  self.port,
                                  git_dir=os.getenv(
                                      "LOOKOUT_SDK_ML_TESTS_GIT_DIR", "."))
        except Exception as e:
            print(type(e).__name__, e)
            self.setUpWasSuccessful = False
            self.setUpEvent.set()

    def test_with_changed_uasts(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(change.base.content, b"")
            self.assertEqual(change.head.content, b"")
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)

    def test_with_changed_uasts_rpc_error(self):
        called = False

        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            nonlocal called
            called = True

        def fail(f):
            def wrapped():
                f()
                self.assertIsNotNone(
                    self.data_service._data_request_local.channel)
                raise grpc.RpcError()

            return wrapped

        self.data_service._get_channel = fail(self.data_service._get_channel)
        func = with_changed_uasts(unicode=False)(func)

        self.assertRaises(
            grpc.RpcError, func, self,
            ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
            ReferencePointer(self.url, self.ref, self.COMMIT_TO),
            self.data_service)
        self.assertFalse(called)
        self.assertIsNone(self.data_service._data_request_local.channel)

    def test_with_changed_contents(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(len(change.base.content), 5548)
            self.assertEqual(len(change.head.content), 5542)
            self.assertFalse(change.base.uast.children)
            self.assertFalse(change.head.uast.children)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)

    def test_with_changed_uasts_and_contents(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(len(change.base.content), 5548)
            self.assertEqual(len(change.head.content), 5542)
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts_and_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)

    def test_with_uasts(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            for file in files:
                self.assertEqual(file.content, b"")
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)

    def test_with_uasts_rpc_error(self):
        called = False

        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            nonlocal called
            called = True

        def fail(f):
            def wrapped():
                f()
                self.assertIsNotNone(
                    self.data_service._data_request_local.channel)
                raise grpc.RpcError()

            return wrapped

        self.data_service._get_channel = fail(self.data_service._get_channel)

        func = with_uasts(unicode=False)(func)
        self.assertRaises(grpc.RpcError, func, self,
                          ReferencePointer(self.url, self.ref, self.COMMIT_TO),
                          None, self.data_service)
        self.assertFalse(called)
        self.assertIsNone(self.data_service._data_request_local.channel)

    def test_with_contents(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            non_empty_langs = 0
            for file in files:
                if not file.path.endswith("__init__.py"):
                    self.assertGreater(len(file.content), 0, file.path)
                self.assertFalse(file.uast.children)
                self.assertTrue(file.path)
                if file.language:
                    non_empty_langs += 1
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))
            self.assertGreater(non_empty_langs, 0)

        func = with_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)

    def test_with_uasts_and_contents(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            for file in files:
                if not file.path.endswith("__init__.py"):
                    self.assertGreater(len(file.content), 0, file.path)
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts_and_contents(unicode=False)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)

    def test_babelfish(self):
        uast, errors = parse_uast(self.data_service.get_bblfsh(),
                                  "console.log('hi');",
                                  "hi.js",
                                  unicode=False)
        self.assertIsInstance(uast, bblfsh.Node)
        self.assertEqual(len(errors), 0, str(errors))

    def test_check_bblfsh_driver_versions(self):
        self.assertRaises(UnsatisfiedDriverVersionError,
                          self.data_service.check_bblfsh_driver_versions,
                          ["brainfuck>=1.0"])
        self.assertRaises(UnsatisfiedDriverVersionError,
                          self.data_service.check_bblfsh_driver_versions,
                          ["javascript<1.0"])
        self.data_service.check_bblfsh_driver_versions(
            ["javascript>=1.3.0,<10.0"])

    def test_with_changed_uasts_unicode(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(change.base.content, "")
            self.assertEqual(change.head.content, "")
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts(unicode=True)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM),
             ReferencePointer(self.url, self.ref, self.COMMIT_TO),
             self.data_service)

    def test_with_uasts_unicode(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_service: DataService, **data):
            self.assertIsInstance(data_service, DataService)
            files = list(data["files"])
            self.assertEqual(len(files), 18)
            for file in files:
                self.assertIsInstance(file, UnicodeFile)
                self.assertEqual(file.content, "")
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts(unicode=True)(func)
        func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None,
             self.data_service)

    def test_babelfish_unicode(self):
        content = b"console.log('\xc3\x80');"

        uast_uni, errors_uni = parse_uast(self.data_service.get_bblfsh(),
                                          content.decode(),
                                          "test.js",
                                          unicode=True)
        uast, errors = parse_uast(self.data_service.get_bblfsh(),
                                  content.decode(),
                                  "test.js",
                                  unicode=False)
        self.assertIsInstance(uast, bblfsh.Node)
        self.assertIsInstance(uast_uni, bblfsh.Node)
        self.assertEqual(errors_uni, errors)
        check_uast_transformation(self, content, uast, uast_uni)
Esempio n. 13
0
class AnalyzerContextManager:
    """Context manager for launching analyzer."""

    def __init__(self, analyzer: Type[Analyzer], db: str, fs: str,
                 init: bool = True, data_request_address: str = "localhost:10301"):
        """
        Initialization.

        :param db: path to an SQLite database with model metadata.
        :param fs: location where to store the trained model.
        :param analyzer: analyzer class to use.
        :param init: Value indicating whether to run the destructive database initialization \
                     or not. If you want to reuse an existing database set False.
        :param data_request_address: DataService GRPC endpoint to use.
        """
        self.analyzer = analyzer
        self.init = init
        self._port = find_port()
        self.data_request_address = data_request_address
        self._sql_alchemy_model_args = Namespace(
            db="sqlite:///%s" % db,
            fs=fs,
            cache_size="1G",
            cache_ttl="6h",
            db_kwargs={},
        )
        self._lookout_sdk = None

    def __enter__(self) -> "AnalyzerContextManager":
        """
        Create the context and run the events listener.
        """
        self.model_repository = create_model_repo_from_args(self._sql_alchemy_model_args)
        if self.init:
            self.model_repository.init()
        self.data_service = DataService(self.data_request_address)
        self.manager = AnalyzerManager(analyzers=[self.analyzer],
                                       model_repository=self.model_repository,
                                       data_service=self.data_service)
        if not check_port_free(self._port):
            self._port = find_port()
        self.listener = EventListener(address="0.0.0.0:%d" % self._port, handlers=self.manager,
                                      n_workers=1)
        self.listener.start()
        self._lookout_sdk = LookoutSDK()
        return self

    def __exit__(self, exc_type=None, exc_val=None, exc_tb=None):
        """
        Stop the events listener and shutdown the context.
        """
        self._lookout_sdk = None
        self.listener.stop()
        self.model_repository.shutdown()
        self.data_service.shutdown()

    def review(self, fr: str, to: str, *, git_dir: str, bblfsh: Optional[str]=None,
               log_level: Optional[str]=None, config_json: Optional[dict]=None) \
            -> subprocess.CompletedProcess:
        """
        Proxy for LookoutSDK.review().

        Triggers a review event and effectively calls the underlying analyzer's `analyze()`.
        Read parameters description in `LookoutSDK.review()`
        """
        if not self._lookout_sdk:
            raise AttributeError(
                "AnalyzerContextManager.review() is available only inside `with`")
        return self._lookout_sdk.review(fr, to, self._port, git_dir=git_dir, bblfsh=bblfsh,
                                        log_level=log_level, config_json=config_json)

    def push(self, fr: str, to: str, *, git_dir: str, bblfsh: Optional[str]=None,
             log_level: Optional[str]=None, config_json: Optional[dict]=None) \
            -> subprocess.CompletedProcess:
        """
        Proxy for LookoutSDK.push().

        Triggers a push event and effectively calls the underlying analyzer's `train()`.
        Read parameters description in `LookoutSDK.push()`
        """
        if not self._lookout_sdk:
            raise AttributeError(
                "AnalyzerContextManager.push() is available only inside `with` statement")
        return self._lookout_sdk.push(fr, to, self._port, git_dir=git_dir, bblfsh=bblfsh,
                                      log_level=log_level, config_json=config_json)
class AnalyzerContextManager:
    """Context manager for launching analyzer."""

    def __init__(self, analyzer: Type[Analyzer], db: str, fs: str,
                 init: bool = True, data_request_address: str = "localhost:10301"):
        """
        Initialization.

        :param db: path to an SQLite database with model metadata.
        :param fs: location where to store the trained model.
        :param analyzer: analyzer class to use.
        :param init: Value indicating whether to run the destructive database initialization \
                     or not. If you want to reuse an existing database set False.
        :param data_request_address: DataService GRPC endpoint to use.
        """
        self.analyzer = analyzer
        self.init = init
        self._port = find_port()
        self.data_request_address = data_request_address
        self._sql_alchemy_model_args = Namespace(
            db="sqlite:///%s" % db,
            fs=fs,
            cache_size="1G",
            cache_ttl="6h",
            db_kwargs={},
        )
        self._lookout_sdk = None

    def __enter__(self) -> "AnalyzerContextManager":
        """
        Create the context and run the events listener.
        """
        self.model_repository = create_model_repo_from_args(self._sql_alchemy_model_args)
        if self.init:
            self.model_repository.init()
        self.data_service = DataService(self.data_request_address)
        self.manager = AnalyzerManager(analyzers=[self.analyzer],
                                       model_repository=self.model_repository,
                                       data_service=self.data_service)
        if not check_port_free(self._port):
            self._port = find_port()
        self.listener = EventListener(address="0.0.0.0:%d" % self._port, handlers=self.manager,
                                      n_workers=1)
        self.listener.start()
        self._lookout_sdk = LookoutSDK()
        return self

    def __exit__(self, exc_type=None, exc_val=None, exc_tb=None):
        """
        Stop the events listener and shutdown the context.
        """
        self._lookout_sdk = None
        self.listener.stop()
        self.model_repository.shutdown()
        self.data_service.shutdown()

    def review(self, fr: str, to: str, *, git_dir: str, bblfsh: Optional[str]=None,
               log_level: Optional[str]=None, config_json: Optional[dict]=None) \
            -> Iterator[Comment]:
        """
        Proxy for LookoutSDK.review().

        Triggers a review event and effectively calls the underlying analyzer's `analyze()`.
        Read parameters description in `LookoutSDK.review()`

        :return: Iterator over the comments generated by the triggered analyzer. \
                 Comment confidence is not provided because of lookout-sdk limitations.
        """
        if not self._lookout_sdk:
            raise AttributeError(
                "AnalyzerContextManager.review() is available only inside `with`")
        process = self._lookout_sdk.review(fr, to, self._port, git_dir=git_dir, bblfsh=bblfsh,
                                           log_level=log_level, config_json=config_json)

        def comments_iterator(logs):
            # TODO (zurk): Use stdout and remove ifs when the lookout issue is solved:
            # https://github.com/src-d/lookout/issues/601
            for log_line in logs.splitlines():
                log_entry = json.loads(log_line.decode())
                if log_entry["msg"] == "line comment":
                    yield Comment(
                        file=log_entry["file"], text=log_entry["text"], line=log_entry["line"])
                if log_entry["msg"] == "file comment":
                    yield Comment(file=log_entry["file"], text=log_entry["text"])
                if log_entry["msg"] == "global comment":
                    yield Comment(text=log_entry["text"])

        return comments_iterator(process.stderr)

    def push(self, fr: str, to: str, *, git_dir: str, bblfsh: Optional[str]=None,
             log_level: Optional[str]=None, config_json: Optional[dict]=None) \
            -> subprocess.CompletedProcess:
        """
        Proxy for LookoutSDK.push().

        Triggers a push event and effectively calls the underlying analyzer's `train()`.
        Read parameters description in `LookoutSDK.push()`
        """
        if not self._lookout_sdk:
            raise AttributeError(
                "AnalyzerContextManager.push() is available only inside `with` statement")
        return self._lookout_sdk.push(fr, to, self._port, git_dir=git_dir, bblfsh=bblfsh,
                                      log_level=log_level, config_json=config_json)
Esempio n. 15
0
class DataRequestsTests(unittest.TestCase, EventHandlers):
    def setUp(self):
        self.setUpEvent = threading.Event()
        self.tearDownEvent = threading.Event()
        self.port = server.find_port()
        self.listener = EventListener("localhost:%d" % self.port, self).start()
        self.server_thread = threading.Thread(target=self.run_data_service)
        self.server_thread.start()
        self.data_service = DataService("localhost:10301")
        self.url = "file://" + str(Path(lookout.__file__).parent.absolute())
        self.ref = "refs/heads/master"
        self.setUpEvent.wait()

    def tearDown(self):
        self.data_service.shutdown()
        self.tearDownEvent.set()
        self.listener.stop()
        self.server_thread.join()

    def process_review_event(self, request: ReviewEvent) -> EventResponse:
        self.setUpEvent.set()
        self.tearDownEvent.wait()
        return EventResponse()

    def process_push_event(self, request: PushEvent) -> EventResponse:
        self.setUpEvent.set()
        self.tearDownEvent.wait()
        return EventResponse()

    def run_data_service(self):
        server.run("push", "4984b98b0e2375e9372fbab4eb4c9cd8f0c289c6",
                   "5833b4ba94154cf1ed07f37c32928c7b4411b36b", self.port)

    def test_with_changed_uasts(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_request_stub: DataStub,
                 **data):
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(change.base.content, b"")
            self.assertEqual(change.head.content, b"")
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts(func)
        func(
            self,
            ReferencePointer(self.url, self.ref,
                             "4984b98b0e2375e9372fbab4eb4c9cd8f0c289c6"),
            ReferencePointer(self.url, self.ref,
                             "5833b4ba94154cf1ed07f37c32928c7b4411b36b"),
            self.data_service.get())

    def test_with_changed_uasts_and_contents(self):
        def func(imposter, ptr_from: ReferencePointer,
                 ptr_to: ReferencePointer, data_request_stub: DataStub,
                 **data):
            changes = list(data["changes"])
            self.assertEqual(len(changes), 1)
            change = changes[0]
            self.assertEqual(len(change.base.content), 5548)
            self.assertEqual(len(change.head.content), 5542)
            self.assertEqual(
                type(change.base.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(
                type(change.head.uast).__module__, bblfsh.Node.__module__)
            self.assertEqual(change.base.path, change.head.path)
            self.assertEqual(change.base.path, "lookout/core/manager.py")
            self.assertEqual(change.base.language, "Python")
            self.assertEqual(change.head.language, "Python")

        func = with_changed_uasts_and_contents(func)
        func(
            self,
            ReferencePointer(self.url, self.ref,
                             "4984b98b0e2375e9372fbab4eb4c9cd8f0c289c6"),
            ReferencePointer(self.url, self.ref,
                             "5833b4ba94154cf1ed07f37c32928c7b4411b36b"),
            self.data_service.get())

    def test_with_uasts(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_request_stub: DataStub, **data):
            files = list(data["files"])
            self.assertEqual(len(files), 61)
            for file in files:
                self.assertEqual(file.content, b"")
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts(func)
        func(
            self,
            ReferencePointer(self.url, self.ref,
                             "5833b4ba94154cf1ed07f37c32928c7b4411b36b"), None,
            self.data_service.get())

    def test_with_uasts_and_contents(self):
        def func(imposter, ptr: ReferencePointer, config: dict,
                 data_request_stub: DataStub, **data):
            files = list(data["files"])
            self.assertEqual(len(files), 61)
            for file in files:
                if not file.path.endswith("__init__.py"):
                    self.assertGreater(len(file.content), 0, file.path)
                self.assertEqual(
                    type(file.uast).__module__, bblfsh.Node.__module__)
                self.assertTrue(file.path)
                self.assertIn(file.language,
                              ("Python", "YAML", "Dockerfile", "Markdown",
                               "Jupyter Notebook", "Shell", "Text", ""))

        func = with_uasts_and_contents(func)
        func(
            self,
            ReferencePointer(self.url, self.ref,
                             "5833b4ba94154cf1ed07f37c32928c7b4411b36b"), None,
            self.data_service.get())