コード例 #1
0
 def test_quality_report_noisy(self):
     slogging.setup("DEBUG", False)
     with Capturing() as output:
         try:
             quality_report_noisy(bblfsh=self.bblfsh,
                                  language=self.language,
                                  confidence_threshold=0.8,
                                  support_threshold=20,
                                  precision_threshold=0.95,
                                  dir_output=tempfile.tempdir,
                                  repos=REPOSITORIES)
         except SystemExit:
             self.skipTest("Matplotlib is required to run this test")
     pattern = re.compile(
         r"((?:prediction rate x)|(?:precision y)): \[(\d+.\d+(, \d+.\d+)+)\]"
     )
     metrics = {}
     for line in output:
         match = pattern.search(line)
         if match:
             metric, scores_string = list(match.groups())[:2]
             scores_string = scores_string.split(", ")
             scores = [float(f) for f in scores_string]
             metrics[metric] = scores
     self.assertGreater(metrics["prediction rate x"][-1], 0)
     self.assertGreater(metrics["precision y"][-1], 0)
コード例 #2
0
 def test_config(self):
     slogging.setup("INFO", True, "XXX.yml")
     with tempfile.NamedTemporaryFile() as f:
         f.write(
             b"FormatAnalyzer: INFO\nRules: INFO\nTrainableRules: INFO\n")
         f.flush()
         slogging.setup("INFO", True, f.name)
コード例 #3
0
def run_analyzers(args):
    """
    Launches the service with the specified analyzers. Blocks until a KeyboardInterrupt.

    :param args: Parsed command line arguments.
    :return: None
    """
    slogging.setup(args.log_level, args.log_structured, args.log_config_path)
    log = logging.getLogger("run")
    model_repository = create_model_repo_from_args(args)
    log.info("Created %s", model_repository)
    if args.request_server == "auto":
        data_request_address = "%s:10301" % args.server.split(":")[0]
    else:
        data_request_address = args.request_server
    data_service = DataService(data_request_address)
    log.info("Created %s", data_service)
    manager = AnalyzerManager(
        analyzers=[
            importlib.import_module(a).analyzer_class for a in args.analyzer
        ],
        model_repository=model_repository,
        data_service=data_service,
    )
    log.info("Created %s", manager)
    listener = EventListener(address=args.server,
                             handlers=manager,
                             n_workers=args.workers)
    log.info("Created %s", listener)
    listener.start()
    log.info("Listening %s", args.server)
    listener.block()
    model_repository.shutdown()
    data_service.shutdown()
コード例 #4
0
def print_reports(input_pattern: str, bblfsh: str, language: str, model_path: str,
                  config: Union[str, dict] = "{}", log_level: str = "INFO") -> None:
    """Print quality and model reports for a given model on a given dataset."""
    slogging.setup(log_level, False)
    log = logging.getLogger("quality_report")
    config = config if isinstance(config, dict) else json.loads(config)
    for report in analyze_files(
            QualityReportAnalyzer, config, model_path, language, bblfsh, input_pattern, log):
        print(report.text)
コード例 #5
0
def init_repo(args):
    """
    Initializes the model repository.

    :param args: Parsed command line arguments.
    :return: None
    """
    slogging.setup(args.log_level, False, args.log_config_path)
    repo = create_model_repo_from_args(args)
    repo.init()
コード例 #6
0
ファイル: package.py プロジェクト: EgorBu/lookout-sdk-ml
def package_cmdline_entry(
        args: argparse.Namespace) -> Union[None, int]:  # noqa: D401
    """
    Package several analyzers to a Docker container and write a sample Docker Compose config \
    for Lookout.

    :param args: Parsed command line arguments.
    :return: None or error code.
    """
    slogging.setup(args.log_level, False, args.log_config_path)
    return package(args.yes, args.no, args.workdir, args.analyzer,
                   args.requirements, args.repo, args.user, args.token)
コード例 #7
0
ファイル: train.py プロジェクト: suhaibmujahid/style-analyzer
def main():
    setup("DEBUG", False)
    parser = ArgumentParser()
    parser.add_argument("training_dir",
                        help="Path to the directory containing the files to train from.")
    parser.add_argument("output_path", help="Path to the model to write.")
    parser.add_argument("--bblfsh", default="0.0.0.0:9432", help="Address of babelfish server.")
    parser.add_argument("--language", default="javascript", help="Language to filter on.")
    parser.add_argument("--config",
                        help="Path to a YAML file containing config to apply during training.")
    args = parser.parse_args()
    train(**vars(args))
コード例 #8
0
ファイル: __main__.py プロジェクト: tsolakoua/style-analyzer
def main():
    args = parse_args()
    slogging.setup("INFO", False)
    clients = threading.local()
    pool = ThreadPoolExecutor(max_workers=args.threads)
    log = logging.getLogger("main")
    log.info("Will parse %d files", len(args.input))
    roles = set()
    reserved = set()
    language = ""
    progress = tqdm(total=len(args.input))
    errors = False

    def analyze_file(path: str):
        nonlocal errors
        if errors:
            return
        try:
            try:
                client = clients.client
            except AttributeError:
                client = bblfsh.BblfshClient(args.bblfsh)
                clients.client = client
            response = client.parse(path)
            nonlocal language
            if not language:
                language = response.language
            elif language != response.language:
                log.warning("dropped %s - language mismatch %s != %s", path,
                            language, response.language)
                return
            analyze_uast(path, response.uast, roles, reserved)
            progress.update(1)
        except:  # noqa: E722
            log.exception("Parsing %s", path)
            errors = True

    with progress:
        for file in args.input:
            pool.submit(analyze_file, file)
        pool.shutdown()
    if errors:
        return 1
    reserved.discard("")
    log.info("Internal roles: %d", len(roles))
    log.info("Reserved: %d", len(reserved))
    generate_files(args.output, roles, reserved)
コード例 #9
0
 def test_structured_logging(self):
     logging.basicConfig()
     handler_backup = logging.getLogger().handlers[0]
     slogging.setup("INFO", True, "logging.yml")
     backup = sys.stdout
     sys.stdout = buffer = io.StringIO()
     try:
         logging.getLogger("test").info("hello, world!")
     finally:
         sys.stdout = backup
     logging.getLogger().handlers[0] = handler_backup
     obj = json.loads(buffer.getvalue())
     self.assertEqual(obj["level"], "info")
     self.assertEqual(obj["msg"], "hello, world!")
     self.assertEqual(obj["source"], "test_slogging.py:18")
     self.assertEqual(len(obj["thread"]), 4)
     self.assertIn("time", obj)
コード例 #10
0
ファイル: __main__.py プロジェクト: zurk/style-analyzer
def main():
    parser = create_parser()
    args = parser.parse_args()
    slogging.setup(args.log_level, False)
    return create_and_train_nn_prediction_from_file(**vars(args))
コード例 #11
0
def main():
    """Entry point."""
    args = parse_args()
    slogging.setup(args.log_level, False)
    clients = threading.local()
    pool = ThreadPoolExecutor(max_workers=args.threads)
    log = logging.getLogger("main")
    log.info("Will parse %d files in %d threads", len(args.input),
             args.threads)
    internal_types = defaultdict(int)
    roles = defaultdict(int)
    reserved = set()
    language = args.parquet_language
    inputs = list(handle_input_arg(args.input))
    progress = tqdm(total=len(inputs))
    progress_lock = threading.Lock()
    errors = False

    def analyze_code_file(path: str):
        nonlocal errors
        if errors:
            return
        try:
            try:
                client = clients.client
            except AttributeError:
                client = bblfsh.BblfshClient(args.bblfsh)
                clients.client = client
            response = client.parse(path)
            nonlocal language
            if not language:
                language = response.language
            elif language != response.language:
                log.warning("dropped %s - language mismatch %s != %s", path,
                            language, response.language)
                return
            content = Path(path).read_text()
            analyze_uast(path, content, response.uast, internal_types, roles,
                         reserved)
        except:  # noqa: E722
            log.exception("Parsing %s", path)
            errors = True
        finally:
            with progress_lock:
                progress.disable = False  # this is needed, do not remove
                progress.update(1)

    def analyze_parquet_row(row: pandas.Series, filepath: str):
        nonlocal errors
        if errors:
            return
        nonlocal language
        try:
            path = "%s:%s" % (filepath, row.path)
            analyze_uast(path, row.content.decode(errors="ignore"),
                         bblfsh.Node.FromString(row.uast), internal_types,
                         roles, reserved)
        except DecodeError as e:
            log.warning(e)
        except:  # noqa: E722
            log.exception("Parsing %s", row.path)
            errors = True
        finally:
            with progress_lock:
                progress.disable = False  # this is needed, do not remove
                progress.update(1)

    try:
        if args.parquet:
            if not language:
                raise ValueError(
                    "--parquet-language must be specified with --parquet.")
            with progress:
                for filepath in inputs:
                    try:
                        data = pandas.read_parquet(filepath)
                    except:  # noqa: E722
                        log.warning("Bad parquet file %s", filepath)
                    else:
                        analyze = partial(analyze_parquet_row,
                                          filepath=filepath)
                        for _, row in data.iterrows():
                            progress.total += 1
                            pool.submit(analyze, row)
                    progress.update(1)
        else:
            with progress:
                for filepath in inputs:
                    pool.submit(analyze_code_file, filepath)
    finally:
        pool.shutdown()
    if errors:
        return 1
    reserved.discard("")
    log.info("Internal types: %d", len(internal_types))
    log.info("UAST roles: %d", len(roles))
    log.info("Reserved: %d", len(reserved))

    roles = {bblfsh.role_name(role_id): n for role_id, n in roles.items()}
    generate_files(args.output, internal_types, roles, reserved)
コード例 #12
0
ファイル: test.py プロジェクト: zurk/style-analyzer
 def setUp(self):
     slogging.setup("DEBUG", False)
     self.bblfsh_endpoint = "0.0.0.0:9432"
コード例 #13
0
 def setUpClass(cls):
     slogging.setup(logging.DEBUG, False)
コード例 #14
0
def main(args):
    """Entry point for quality report generation."""
    os.makedirs(args.output, exist_ok=True)
    assert os.path.isdir(args.output), "Output should be a directory"
    slogging.setup(args.log_level, False)
    log = logging.getLogger("QualityAnalyzer")
    handler = logging.handlers.RotatingFileHandler(
        os.path.join(args.output, "errors.txt"))
    handler.setLevel(logging.ERROR)
    log.addHandler(handler)
    if not server.exefile.exists():
        server.fetch()  # download executable
    # prepare output directory
    reports = []

    port = server.find_port()
    review_config = {QualityReportAnalyzer.name: {"aggregate": True}}
    train_config = json.loads(args.train_config)

    with tempfile.TemporaryDirectory() as tmpdirname:
        database = args.database if args.database else os.path.join(
            tmpdirname, "db.sqlite3")
        fs = args.fs if args.fs else os.path.join(tmpdirname, "models")
        os.makedirs(fs, exist_ok=fs)
        with AnalyzerContextManager(
                port=port,
                db=database,
                fs=fs,
                analyzer="lookout.style.format.benchmarks.general_report",
                init=False):
            start_time = datetime.now()
            for ri, repo in enumerate(REPOSITORIES):
                repo, to_commit, from_commit = repo.split()
                now = datetime.now()
                if ri > 0:
                    left = (len(REPOSITORIES) - ri) / ri * (now - start_time)
                else:
                    left = None
                log.info(
                    "\n%s\n"
                    "= %-76s =\n"
                    "= %2d / %2d%s=\n"
                    "= Now:  %-60s%s=\n"
                    "= Left: %-40s%s=\n"
                    "= Ends: %-60s%s=\n"
                    "%s",
                    "=" * 80,
                    repo,
                    ri + 1,
                    len(REPOSITORIES),
                    " " * 70,
                    now,
                    " " * 11,
                    left,
                    " " * 31,
                    now + left if left is not None else None,
                    " " * 11,
                    "=" * 80,
                )
                report_loc = os.path.join(args.output, get_repo_name(repo))
                quality_rep_loc = report_loc + ".quality_report.md"
                model_rep_loc = report_loc + ".model_report.md"
                # generate or read report
                try:
                    if args.force or not os.path.exists(quality_rep_loc) or \
                            not os.path.exists(model_rep_loc):
                        # Skip this step if report was already generated
                        report = measure_quality(repo,
                                                 to_commit=to_commit,
                                                 from_commit=from_commit,
                                                 port=port,
                                                 review_config=review_config,
                                                 train_config=train_config,
                                                 bblfsh=args.bblfsh)
                        if report.quality is not None:
                            with open(quality_rep_loc, "w",
                                      encoding="utf-8") as f:
                                f.write(report.quality)
                        if report.model is not None:
                            with open(model_rep_loc, "w",
                                      encoding="utf-8") as f:
                                f.write(report.model)
                    else:
                        report = QualityReport()
                        with open(quality_rep_loc, encoding="utf-8") as f:
                            report.quality = f.read()
                        with open(model_rep_loc, encoding="utf-8") as f:
                            report.model = f.read()
                    if report.quality is not None and report.model is not None:
                        reports.append((repo, report))
                    else:
                        log.warning("skipped %s: quality %s model %s", repo,
                                    report.quality is not None, report.model
                                    is not None)
                except Exception:
                    log.exception("-" * 20 + "\nFailed to process %s repo",
                                  repo)
                    continue

        # precision, recall, f1, support, n_rules, avg_len stats
        table = []
        fields2id = OrderedDict()
        additional_fields = ("Rules Number", "Average Rule Len")
        with io.StringIO() as output:
            for repo, report in reports:
                metrics = _get_metrics(report.quality)
                if not table:
                    table.append(("repo", ) + metrics._fields +
                                 additional_fields)
                    for i, field in enumerate(table[0]):
                        fields2id[field] = i
                n_rules, avg_len = _get_model_summary(report.model)
                table.append((get_repo_name(repo), ) + metrics +
                             (n_rules, avg_len))
            average = tuple(
                ("%" + FLOAT_PRECISION) % calc_avg(table[1:], fields2id[field])
                for field in metrics._fields)
            average += tuple(
                ("%" + FLOAT_PRECISION) % calc_avg(table[1:], fields2id[field])
                for field in additional_fields)
            fields_to_weight = (
                ("precision", "support"),
                ("recall", "support"),
                ("full_recall", "full_support"),
                ("f1", "support"),
                ("full_f1", "full_support"),
                ("ppcr", "support"),
            )
            weighted_average = []
            for field, weight_field in fields_to_weight:
                weighted_average.append(
                    ("%" + FLOAT_PRECISION) %
                    calc_weighted_avg(table[1:],
                                      col=fields2id[field],
                                      weight_col=fields2id[weight_field]))
            table.append(("Average", ) + average)
            table.append(("Weighted average", ) + tuple(weighted_average))
            float_fields = ("precision", "recall", "full_recall", "f1",
                            "full_f1", "ppcr")
            floatfmts = []
            for field in fields2id:
                if field in float_fields:
                    floatfmts.append(FLOAT_PRECISION)
                else:
                    floatfmts.append("g")

            print(tabulate(table,
                           tablefmt="pipe",
                           headers="firstrow",
                           floatfmt=floatfmts),
                  file=output)
            summary = output.getvalue()
        print(summary)
        summary_loc = os.path.join(args.output, "summary.md")
        with open(summary_loc, "w", encoding="utf-8") as f:
            f.write(summary)
コード例 #15
0
 def setUpClass(cls):
     slogging.setup("INFO", False, "")