def run_analyzers(args: argparse.Namespace): """ Launch the service with the specified analyzers. Blocks until a KeyboardInterrupt. :param args: Parsed command line arguments. :return: None """ log = logging.getLogger("run") model_repository = create_model_repo_from_args(args) log.info("Created %s", model_repository) if args.request_server == "auto": data_request_address = "%s:10301" % args.server.split(":")[0] else: data_request_address = args.request_server data_service = DataService(data_request_address) log.info("Created %s", data_service) sys.path.append(os.getcwd()) manager = AnalyzerManager( analyzers=[ importlib.import_module(a).analyzer_class for a in args.analyzer ], model_repository=model_repository, data_service=data_service, ) sys.path = sys.path[:-1] log.info("Created %s", manager) listener = EventListener(address=args.server, handlers=manager, n_workers=args.workers) log.info("Created %s", listener) listener.start() log.info("Listening %s", args.server) listener.block() model_repository.shutdown() data_service.shutdown()
def setUp(self): self.setUpEvent = threading.Event() self.tearDownEvent = threading.Event() self.port = server.find_port() self.listener = EventListener("localhost:%d" % self.port, self).start() self.server_thread = threading.Thread(target=self.run_data_service) self.server_thread.start() self.data_service = DataService("localhost:10301") self.url = "file://" + str(Path(lookout.__file__).parent.absolute()) self.ref = "refs/heads/master" self.setUpEvent.wait()
class AnalyzerContextManager: """Context manager for launching analyzer.""" def __init__(self, analyzer: Type[Analyzer], port: int, db: str, fs: str, init: bool = True, data_request_address: str = "localhost:10301"): """ Init analyzer: model_repository, data_service, arguments, etc. :param port: port to use for analyzer. :param db: path to sqlite database location. :param fs: location where to store results of launched analyzer. :param analyzer: analyzer class to use. :param init: To run `analyzer init` or not. \ If you want to reuse existing database set False. :param data_request_address: DataService GRPC endpoint to use. """ self.analyzer = analyzer self.port = port self.init = init self.data_request_address = data_request_address self._sql_alchemy_model_args = Namespace( db="sqlite:///%s" % db, fs=fs, cache_size="1G", cache_ttl="6h", db_kwargs={}, ) def __enter__(self) -> "AnalyzerContextManager": self.model_repository = create_model_repo_from_args( self._sql_alchemy_model_args) if self.init: self.model_repository.init() self.data_service = DataService(self.data_request_address) self.manager = AnalyzerManager(analyzers=[self.analyzer], model_repository=self.model_repository, data_service=self.data_service) self.listener = EventListener(address="0.0.0.0:%d" % self.port, handlers=self.manager, n_workers=1) self.listener.start() return self def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): self.listener.stop() self.model_repository.shutdown() self.data_service.shutdown()
def __enter__(self) -> "AnalyzerContextManager": self.model_repository = create_model_repo_from_args( self._sql_alchemy_model_args) if self.init: self.model_repository.init() self.data_service = DataService(self.data_request_address) self.manager = AnalyzerManager(analyzers=[self.analyzer], model_repository=self.model_repository, data_service=self.data_service) self.listener = EventListener(address="0.0.0.0:%d" % self.port, handlers=self.manager, n_workers=1) self.listener.start() return self
def __init__(self, port: int, db: str, fs: str, config: str = "", analyzer: Union[ str, Sequence[str], Iterable[Type[Analyzer]]] = "lookout.style.format", init: bool = True) -> None: """ Init analyzer: model_repository, data_service, arguments, etc. :param port: port to use for analyzer. :param db: database location. :param fs: location where to store results of launched analyzer. :param config: Path to the configuration file with option defaults. If empty - skip. :param analyzer: analyzer(s) to use. :param init: To run `analyzer init` or not. \ If you want to reuse existing database set False. """ self.port = port self.db = db self.fs = fs self.config_path = config # mimic TestAnalyzer - not used so far if isinstance(analyzer, (str, type)): self.analyzer = [analyzer] if isinstance(self.analyzer[0], str): self.analyzer = [ importlib.import_module(a).analyzer_class for a in self.analyzer ] class Args: pass self.args = Namespace() self.args.db = "sqlite:///%s" % self.db self.args.fs = self.fs self.args.cache_size = "1G" self.args.cache_ttl = "6h" self.args.db_kwargs = {} self.args.workers = 1 # initialize model repository self.model_repository = create_model_repo_from_args(self.args) if init: self.model_repository.init() # initialize a new instance of DataService data_request_address = "0.0.0.0:10301" self.data_service = DataService(data_request_address)
def setUp(self): self.setUpEvent = threading.Event() self.tearDownEvent = threading.Event() self.port = find_port() self.lookout_sdk = LookoutSDK() self.listener = EventListener("localhost:%d" % self.port, self).start() self.server_thread = threading.Thread(target=self.run_data_service) self.server_thread.start() self.data_service = DataService("localhost:10301") self.url = "file://" + str( Path(lookout.core.__file__).parent.parent.absolute()) self.ref = "refs/heads/master" self.setUpWasSuccessful = True self.setUpEvent.wait() if not self.setUpWasSuccessful: self.fail("failed to setUp()")
def generate_file_fixes(self, data_service: DataService, changes: Sequence[Change], ) -> Iterator[FileFix]: """ Generate all data required for any type of further processing. Next processing can be comment generation or performance report generation. :param data_service: Connection to the Lookout data retrieval service. :param changes: The list of changes in the pointed state. :return: Iterator with unrendered data per comment. """ log = self._log base_files_by_lang = files_by_language(c.base for c in changes) head_files_by_lang = files_by_language(c.head for c in changes) processed_files_counter = defaultdict(int) processed_fixes_counter = defaultdict(int) for lang, head_files in head_files_by_lang.items(): if lang not in self.model: log.warning("skipped %d written in %s. Rules for %s do not exist in model", len(head_files), lang, lang) continue rules = self.model[lang] config = self.analyze_config[lang] rules = rules.filter_by_confidence(config["confidence_threshold"]) \ .filter_by_support(config["support_threshold"]) for file in filter_files(head_files, rules.origin_config["line_length_limit"], rules.origin_config["overall_size_limit"], log=log): processed_files_counter[lang] += 1 try: prev_file = base_files_by_lang[lang][file.path] except KeyError: prev_file = None lines = None else: lines = sorted(chain.from_iterable(( find_new_lines(prev_file, file), find_deleted_lines(prev_file, file), ))) log.debug("%s %s", file.path, lines) fe = FeatureExtractor(language=lang, **rules.origin_config["feature_extractor"]) feature_extractor_output = fe.extract_features([file], [lines]) if feature_extractor_output is None: submit_event("%s.analyze.%s.parse_failures" % (self.name, lang), 1) if config["report_parse_failures"]: log.warning("Failed to parse %s", file.path) yield FileFix(error="Failed to parse", head_file=file, language=lang, feature_extractor=fe, base_file=prev_file, file_vnodes=[], line_fixes=[], y_pred_pure=None, y=None) else: fixes, file_vnodes, y_pred_pure, y = self._generate_token_fixes( file, fe, feature_extractor_output, data_service.get_bblfsh(), rules) log.debug("%s %d fixes", file.path, len(fixes)) processed_fixes_counter[lang] += len(fixes) yield FileFix(error="", head_file=file, language=lang, feature_extractor=fe, base_file=prev_file, file_vnodes=file_vnodes, line_fixes=fixes, y_pred_pure=y_pred_pure, y=y) for key, val in processed_files_counter.items(): submit_event("%s.analyze.%s.files" % (self.name, key), val) for key, val in processed_fixes_counter.items(): submit_event("%s.analyze.%s.fixes" % (self.name, key), val)
def run(self, ptr_from: ReferencePointer, data_service_head: DataService, data_service_base: Optional[DataService] = None ) -> Iterable[FileFix]: """ Run `generate_file_fixes` for all files in ptr_from revision. :param ptr_from: Git repository state pointer to the base revision. :param data_service_head: Connection to the Lookout data retrieval service to get \ the new files. :param data_service_base: Connection to the Lookout data retrieval service to get \ the initial files. If it is None, we assume the empty contents. :return: Generator of fixes for each file. """ files_head = list( request_files(data_service_head.get_data(), ptr_from, contents=True, uast=True, unicode=True)) if data_service_base is not None: files_base = list( request_files(data_service_base.get_data(), ptr_from, contents=True, uast=True, unicode=True)) else: files_base = [File(path=f.path) for f in files_head] return self.generate_file_fixes( data_service_head, [self.Changes(f1, f2) for f1, f2 in zip(files_base, files_head)])
def __enter__(self) -> "AnalyzerContextManager": """ Create the context and run the events listener. """ self.model_repository = create_model_repo_from_args(self._sql_alchemy_model_args) if self.init: self.model_repository.init() self.data_service = DataService(self.data_request_address) self.manager = AnalyzerManager(analyzers=[self.analyzer], model_repository=self.model_repository, data_service=self.data_service) if not check_port_free(self._port): self._port = find_port() self.listener = EventListener(address="0.0.0.0:%d" % self._port, handlers=self.manager, n_workers=1) self.listener.start() self._lookout_sdk = LookoutSDK() return self
def run(self, ptr: ReferencePointer, data_service: DataService) -> Iterable[TypoFix]: """ Run `generate_typos_fixes` for all lines and all files in `ptr_from` revision. :param ptr: Git repository state pointer to the revision that should be analyzed. :param data_service: Connection to the Lookout data retrieval service to get the files. :return: Generator of fixes for each file. """ for file in request_files(data_service.get_data(), ptr, contents=True, uast=True, unicode=False): if file.path == self.config["analyze"]["filepath"]: break else: raise ValueError("No such file %s in %s" % (self.config["analyze"]["filepath"], ptr)) line = self.config["analyze"]["line"] + 1 try: self._find_new_lines_return_value = [line] typos_fixes = list( self.generate_typos_fixes([Change(head=file, base=file)])) line_identifiers = self._get_identifiers(file.uast, [line]) line_identifiers = [n.token for n in line_identifiers] identifiers_number = len(set(line_identifiers)) if not identifiers_number: raise ValueError("No identifiers for %s:%d in %s" % (self.config["analyze"]["filepath"], self.config["analyze"]["line"] + 1, ptr)) assert self.config["analyze"]["wrong_id"] in line_identifiers, \ "Identifier %s was not found in the %s:%d.\nLine identifiers are %s" % ( self.config["analyze"]["wrong_id"], self.config["analyze"]["filepath"], line, line_identifiers) if typos_fixes: return typos_fixes return [ TypoFix(content=file.content.decode("utf-8", "replace"), path=file.path, line_number=0, identifier="", candidates=[], identifiers_number=identifiers_number) ] finally: self._find_new_lines_return_value = None
def run(self, ptr: ReferencePointer, data_service: DataService) -> Iterable[TypoFix]: """ Run `generate_typos_fixes` for all lines and all files in `ptr_from` revision. :param ptr: Git repository state pointer to the revision that should be analyzed. :param data_service: Connection to the Lookout data retrieval service to get the files. :return: Generator of fixes for each file. """ for file in request_files(data_service.get_data(), ptr, contents=True, uast=True, unicode=False): if file.path == self.config["filepath_to_analyze"]: break else: raise ValueError("No such file %s in %s" % (self.config["filepath_to_analyze"], ptr)) typos_fixes = list( self.generate_typos_fixes([ UnicodeChange(head=file, base=File(path=file.path, language=file.language)) ])) if typos_fixes: return typos_fixes identifiers_number = len(self._get_identifiers(file.uast, [])) if not identifiers_number: raise ValueError("No identifiers for file %s in %s" % (self.config["filepath_to_analyze"], ptr)) return [ TypoFix(content=file.content.decode("utf-8", "replace"), path=file.path, line_number=0, identifier="", candidates=[], identifiers_number=identifiers_number) ]
class DataRequestsTests(unittest.TestCase, EventHandlers): COMMIT_FROM = "3ac2a59275902f7252404d26680e30cc41efb837" COMMIT_TO = "dce7fcba3d2151a0d5dc4b3a89cfc0911c96cf2b" def setUp(self): self.setUpEvent = threading.Event() self.tearDownEvent = threading.Event() self.port = find_port() self.lookout_sdk = LookoutSDK() self.listener = EventListener("localhost:%d" % self.port, self).start() self.server_thread = threading.Thread(target=self.run_data_service) self.server_thread.start() self.data_service = DataService("localhost:10301") self.url = "file://" + str( Path(lookout.core.__file__).parent.parent.absolute()) self.ref = "refs/heads/master" self.setUpWasSuccessful = True self.setUpEvent.wait() if not self.setUpWasSuccessful: self.fail("failed to setUp()") def tearDown(self): self.data_service.shutdown() self.tearDownEvent.set() self.listener.stop() self.server_thread.join() def process_review_event(self, request: ReviewEvent) -> EventResponse: self.setUpEvent.set() self.tearDownEvent.wait() return EventResponse() def process_push_event(self, request: PushEvent) -> EventResponse: self.setUpEvent.set() self.tearDownEvent.wait() return EventResponse() def run_data_service(self): try: self.lookout_sdk.push(self.COMMIT_FROM, self.COMMIT_TO, self.port, git_dir=os.getenv( "LOOKOUT_SDK_ML_TESTS_GIT_DIR", ".")) except Exception as e: print(type(e).__name__, e) self.setUpWasSuccessful = False self.setUpEvent.set() def test_with_changed_uasts(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(change.base.content, b"") self.assertEqual(change.head.content, b"") self.assertEqual( type(change.base.uast).__module__, bblfsh.Node.__module__) self.assertEqual( type(change.head.uast).__module__, bblfsh.Node.__module__) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_uasts(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) def test_with_changed_uasts_rpc_error(self): called = False def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): nonlocal called called = True def fail(f): def wrapped(): f() self.assertIsNotNone( self.data_service._data_request_local.channel) raise grpc.RpcError() return wrapped self.data_service._get_channel = fail(self.data_service._get_channel) func = with_changed_uasts(unicode=False)(func) self.assertRaises( grpc.RpcError, func, self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) self.assertFalse(called) self.assertIsNone(self.data_service._data_request_local.channel) def test_with_changed_contents(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(len(change.base.content), 5548) self.assertEqual(len(change.head.content), 5542) self.assertFalse(change.base.uast.children) self.assertFalse(change.head.uast.children) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_contents(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) def test_with_changed_uasts_and_contents(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(len(change.base.content), 5548) self.assertEqual(len(change.head.content), 5542) self.assertEqual( type(change.base.uast).__module__, bblfsh.Node.__module__) self.assertEqual( type(change.head.uast).__module__, bblfsh.Node.__module__) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_uasts_and_contents(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) def test_with_uasts(self): def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) files = list(data["files"]) self.assertEqual(len(files), 18) for file in files: self.assertEqual(file.content, b"") self.assertEqual( type(file.uast).__module__, bblfsh.Node.__module__) self.assertTrue(file.path) self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) func = with_uasts(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) def test_with_uasts_rpc_error(self): called = False def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): nonlocal called called = True def fail(f): def wrapped(): f() self.assertIsNotNone( self.data_service._data_request_local.channel) raise grpc.RpcError() return wrapped self.data_service._get_channel = fail(self.data_service._get_channel) func = with_uasts(unicode=False)(func) self.assertRaises(grpc.RpcError, func, self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) self.assertFalse(called) self.assertIsNone(self.data_service._data_request_local.channel) def test_with_contents(self): def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) files = list(data["files"]) self.assertEqual(len(files), 18) non_empty_langs = 0 for file in files: if not file.path.endswith("__init__.py"): self.assertGreater(len(file.content), 0, file.path) self.assertFalse(file.uast.children) self.assertTrue(file.path) if file.language: non_empty_langs += 1 self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) self.assertGreater(non_empty_langs, 0) func = with_contents(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) def test_with_uasts_and_contents(self): def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) files = list(data["files"]) self.assertEqual(len(files), 18) for file in files: if not file.path.endswith("__init__.py"): self.assertGreater(len(file.content), 0, file.path) self.assertEqual( type(file.uast).__module__, bblfsh.Node.__module__) self.assertTrue(file.path) self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) func = with_uasts_and_contents(unicode=False)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) def test_babelfish(self): uast, errors = parse_uast(self.data_service.get_bblfsh(), "console.log('hi');", "hi.js", unicode=False) self.assertIsInstance(uast, bblfsh.Node) self.assertEqual(len(errors), 0, str(errors)) def test_check_bblfsh_driver_versions(self): self.assertRaises(UnsatisfiedDriverVersionError, self.data_service.check_bblfsh_driver_versions, ["brainfuck>=1.0"]) self.assertRaises(UnsatisfiedDriverVersionError, self.data_service.check_bblfsh_driver_versions, ["javascript<1.0"]) self.data_service.check_bblfsh_driver_versions( ["javascript>=1.3.0,<10.0"]) def test_with_changed_uasts_unicode(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(change.base.content, "") self.assertEqual(change.head.content, "") self.assertEqual( type(change.base.uast).__module__, bblfsh.Node.__module__) self.assertEqual( type(change.head.uast).__module__, bblfsh.Node.__module__) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_uasts(unicode=True)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_FROM), ReferencePointer(self.url, self.ref, self.COMMIT_TO), self.data_service) def test_with_uasts_unicode(self): def func(imposter, ptr: ReferencePointer, config: dict, data_service: DataService, **data): self.assertIsInstance(data_service, DataService) files = list(data["files"]) self.assertEqual(len(files), 18) for file in files: self.assertIsInstance(file, UnicodeFile) self.assertEqual(file.content, "") self.assertEqual( type(file.uast).__module__, bblfsh.Node.__module__) self.assertTrue(file.path) self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) func = with_uasts(unicode=True)(func) func(self, ReferencePointer(self.url, self.ref, self.COMMIT_TO), None, self.data_service) def test_babelfish_unicode(self): content = b"console.log('\xc3\x80');" uast_uni, errors_uni = parse_uast(self.data_service.get_bblfsh(), content.decode(), "test.js", unicode=True) uast, errors = parse_uast(self.data_service.get_bblfsh(), content.decode(), "test.js", unicode=False) self.assertIsInstance(uast, bblfsh.Node) self.assertIsInstance(uast_uni, bblfsh.Node) self.assertEqual(errors_uni, errors) check_uast_transformation(self, content, uast, uast_uni)
class AnalyzerContextManager: """Context manager for launching analyzer.""" def __init__(self, analyzer: Type[Analyzer], db: str, fs: str, init: bool = True, data_request_address: str = "localhost:10301"): """ Initialization. :param db: path to an SQLite database with model metadata. :param fs: location where to store the trained model. :param analyzer: analyzer class to use. :param init: Value indicating whether to run the destructive database initialization \ or not. If you want to reuse an existing database set False. :param data_request_address: DataService GRPC endpoint to use. """ self.analyzer = analyzer self.init = init self._port = find_port() self.data_request_address = data_request_address self._sql_alchemy_model_args = Namespace( db="sqlite:///%s" % db, fs=fs, cache_size="1G", cache_ttl="6h", db_kwargs={}, ) self._lookout_sdk = None def __enter__(self) -> "AnalyzerContextManager": """ Create the context and run the events listener. """ self.model_repository = create_model_repo_from_args(self._sql_alchemy_model_args) if self.init: self.model_repository.init() self.data_service = DataService(self.data_request_address) self.manager = AnalyzerManager(analyzers=[self.analyzer], model_repository=self.model_repository, data_service=self.data_service) if not check_port_free(self._port): self._port = find_port() self.listener = EventListener(address="0.0.0.0:%d" % self._port, handlers=self.manager, n_workers=1) self.listener.start() self._lookout_sdk = LookoutSDK() return self def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): """ Stop the events listener and shutdown the context. """ self._lookout_sdk = None self.listener.stop() self.model_repository.shutdown() self.data_service.shutdown() def review(self, fr: str, to: str, *, git_dir: str, bblfsh: Optional[str]=None, log_level: Optional[str]=None, config_json: Optional[dict]=None) \ -> subprocess.CompletedProcess: """ Proxy for LookoutSDK.review(). Triggers a review event and effectively calls the underlying analyzer's `analyze()`. Read parameters description in `LookoutSDK.review()` """ if not self._lookout_sdk: raise AttributeError( "AnalyzerContextManager.review() is available only inside `with`") return self._lookout_sdk.review(fr, to, self._port, git_dir=git_dir, bblfsh=bblfsh, log_level=log_level, config_json=config_json) def push(self, fr: str, to: str, *, git_dir: str, bblfsh: Optional[str]=None, log_level: Optional[str]=None, config_json: Optional[dict]=None) \ -> subprocess.CompletedProcess: """ Proxy for LookoutSDK.push(). Triggers a push event and effectively calls the underlying analyzer's `train()`. Read parameters description in `LookoutSDK.push()` """ if not self._lookout_sdk: raise AttributeError( "AnalyzerContextManager.push() is available only inside `with` statement") return self._lookout_sdk.push(fr, to, self._port, git_dir=git_dir, bblfsh=bblfsh, log_level=log_level, config_json=config_json)
class AnalyzerContextManager: """Context manager for launching analyzer.""" def __init__(self, analyzer: Type[Analyzer], db: str, fs: str, init: bool = True, data_request_address: str = "localhost:10301"): """ Initialization. :param db: path to an SQLite database with model metadata. :param fs: location where to store the trained model. :param analyzer: analyzer class to use. :param init: Value indicating whether to run the destructive database initialization \ or not. If you want to reuse an existing database set False. :param data_request_address: DataService GRPC endpoint to use. """ self.analyzer = analyzer self.init = init self._port = find_port() self.data_request_address = data_request_address self._sql_alchemy_model_args = Namespace( db="sqlite:///%s" % db, fs=fs, cache_size="1G", cache_ttl="6h", db_kwargs={}, ) self._lookout_sdk = None def __enter__(self) -> "AnalyzerContextManager": """ Create the context and run the events listener. """ self.model_repository = create_model_repo_from_args(self._sql_alchemy_model_args) if self.init: self.model_repository.init() self.data_service = DataService(self.data_request_address) self.manager = AnalyzerManager(analyzers=[self.analyzer], model_repository=self.model_repository, data_service=self.data_service) if not check_port_free(self._port): self._port = find_port() self.listener = EventListener(address="0.0.0.0:%d" % self._port, handlers=self.manager, n_workers=1) self.listener.start() self._lookout_sdk = LookoutSDK() return self def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): """ Stop the events listener and shutdown the context. """ self._lookout_sdk = None self.listener.stop() self.model_repository.shutdown() self.data_service.shutdown() def review(self, fr: str, to: str, *, git_dir: str, bblfsh: Optional[str]=None, log_level: Optional[str]=None, config_json: Optional[dict]=None) \ -> Iterator[Comment]: """ Proxy for LookoutSDK.review(). Triggers a review event and effectively calls the underlying analyzer's `analyze()`. Read parameters description in `LookoutSDK.review()` :return: Iterator over the comments generated by the triggered analyzer. \ Comment confidence is not provided because of lookout-sdk limitations. """ if not self._lookout_sdk: raise AttributeError( "AnalyzerContextManager.review() is available only inside `with`") process = self._lookout_sdk.review(fr, to, self._port, git_dir=git_dir, bblfsh=bblfsh, log_level=log_level, config_json=config_json) def comments_iterator(logs): # TODO (zurk): Use stdout and remove ifs when the lookout issue is solved: # https://github.com/src-d/lookout/issues/601 for log_line in logs.splitlines(): log_entry = json.loads(log_line.decode()) if log_entry["msg"] == "line comment": yield Comment( file=log_entry["file"], text=log_entry["text"], line=log_entry["line"]) if log_entry["msg"] == "file comment": yield Comment(file=log_entry["file"], text=log_entry["text"]) if log_entry["msg"] == "global comment": yield Comment(text=log_entry["text"]) return comments_iterator(process.stderr) def push(self, fr: str, to: str, *, git_dir: str, bblfsh: Optional[str]=None, log_level: Optional[str]=None, config_json: Optional[dict]=None) \ -> subprocess.CompletedProcess: """ Proxy for LookoutSDK.push(). Triggers a push event and effectively calls the underlying analyzer's `train()`. Read parameters description in `LookoutSDK.push()` """ if not self._lookout_sdk: raise AttributeError( "AnalyzerContextManager.push() is available only inside `with` statement") return self._lookout_sdk.push(fr, to, self._port, git_dir=git_dir, bblfsh=bblfsh, log_level=log_level, config_json=config_json)
class DataRequestsTests(unittest.TestCase, EventHandlers): def setUp(self): self.setUpEvent = threading.Event() self.tearDownEvent = threading.Event() self.port = server.find_port() self.listener = EventListener("localhost:%d" % self.port, self).start() self.server_thread = threading.Thread(target=self.run_data_service) self.server_thread.start() self.data_service = DataService("localhost:10301") self.url = "file://" + str(Path(lookout.__file__).parent.absolute()) self.ref = "refs/heads/master" self.setUpEvent.wait() def tearDown(self): self.data_service.shutdown() self.tearDownEvent.set() self.listener.stop() self.server_thread.join() def process_review_event(self, request: ReviewEvent) -> EventResponse: self.setUpEvent.set() self.tearDownEvent.wait() return EventResponse() def process_push_event(self, request: PushEvent) -> EventResponse: self.setUpEvent.set() self.tearDownEvent.wait() return EventResponse() def run_data_service(self): server.run("push", "4984b98b0e2375e9372fbab4eb4c9cd8f0c289c6", "5833b4ba94154cf1ed07f37c32928c7b4411b36b", self.port) def test_with_changed_uasts(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_request_stub: DataStub, **data): changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(change.base.content, b"") self.assertEqual(change.head.content, b"") self.assertEqual( type(change.base.uast).__module__, bblfsh.Node.__module__) self.assertEqual( type(change.head.uast).__module__, bblfsh.Node.__module__) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_uasts(func) func( self, ReferencePointer(self.url, self.ref, "4984b98b0e2375e9372fbab4eb4c9cd8f0c289c6"), ReferencePointer(self.url, self.ref, "5833b4ba94154cf1ed07f37c32928c7b4411b36b"), self.data_service.get()) def test_with_changed_uasts_and_contents(self): def func(imposter, ptr_from: ReferencePointer, ptr_to: ReferencePointer, data_request_stub: DataStub, **data): changes = list(data["changes"]) self.assertEqual(len(changes), 1) change = changes[0] self.assertEqual(len(change.base.content), 5548) self.assertEqual(len(change.head.content), 5542) self.assertEqual( type(change.base.uast).__module__, bblfsh.Node.__module__) self.assertEqual( type(change.head.uast).__module__, bblfsh.Node.__module__) self.assertEqual(change.base.path, change.head.path) self.assertEqual(change.base.path, "lookout/core/manager.py") self.assertEqual(change.base.language, "Python") self.assertEqual(change.head.language, "Python") func = with_changed_uasts_and_contents(func) func( self, ReferencePointer(self.url, self.ref, "4984b98b0e2375e9372fbab4eb4c9cd8f0c289c6"), ReferencePointer(self.url, self.ref, "5833b4ba94154cf1ed07f37c32928c7b4411b36b"), self.data_service.get()) def test_with_uasts(self): def func(imposter, ptr: ReferencePointer, config: dict, data_request_stub: DataStub, **data): files = list(data["files"]) self.assertEqual(len(files), 61) for file in files: self.assertEqual(file.content, b"") self.assertEqual( type(file.uast).__module__, bblfsh.Node.__module__) self.assertTrue(file.path) self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) func = with_uasts(func) func( self, ReferencePointer(self.url, self.ref, "5833b4ba94154cf1ed07f37c32928c7b4411b36b"), None, self.data_service.get()) def test_with_uasts_and_contents(self): def func(imposter, ptr: ReferencePointer, config: dict, data_request_stub: DataStub, **data): files = list(data["files"]) self.assertEqual(len(files), 61) for file in files: if not file.path.endswith("__init__.py"): self.assertGreater(len(file.content), 0, file.path) self.assertEqual( type(file.uast).__module__, bblfsh.Node.__module__) self.assertTrue(file.path) self.assertIn(file.language, ("Python", "YAML", "Dockerfile", "Markdown", "Jupyter Notebook", "Shell", "Text", "")) func = with_uasts_and_contents(func) func( self, ReferencePointer(self.url, self.ref, "5833b4ba94154cf1ed07f37c32928c7b4411b36b"), None, self.data_service.get())