def analyze_files(
    analyzer_type: Type[FormatAnalyzer],
    config: dict,
    model_path: str,
    language: str,
    bblfsh_addr: str,
    input_pattern: str,
    log: logging.Logger,
) -> List[Comment]:
    """Run the model, record the fixes for each file and return them."""
    class FakePointer:
        def to_pb(self):
            return None

    model = FormatModel().load(model_path)
    if language not in model:
        raise NotFittedError()
    rules = model[language]
    client = bblfsh.BblfshClient(bblfsh_addr)
    files = parse_files(
        filepaths=glob.glob(input_pattern, recursive=True),
        line_length_limit=rules.origin_config["line_length_limit"],
        overall_size_limit=rules.origin_config["overall_size_limit"],
        client=client,
        language=language,
        log=log)
    log.info("Model parameters: %s" % rules.origin_config)
    log.info("Rules stats: %s" % rules)
    log.info("Number of files: %s" % (len(files)))
    return analyzer_type(model, input_pattern,
                         config).analyze(FakePointer(),
                                         None,
                                         data_service=FakeDataService(
                                             client, files, []))
Exemple #2
0
def files2mispreds(filepaths: Iterable[str], feature_extractor: FeatureExtractor, rules: Rules,
                   client: BblfshClient, log: logging.Logger) -> Iterable[Misprediction]:
    """
    Return the model's `Misprediction`-s on a list of files.

    :param filepaths: List of files to get `Misprediction`-s from.
    :param feature_extractor: FeatureExtractor to use.
    :param rules: Rules to use for prediction.
    :param client: Babelfish client. Babelfish server should be started accordingly.
    :param log: Logger.
    :return: List of `Misprediction`-s extracted from a given list of files.
    """
    files = parse_files(filepaths=filepaths,
                        line_length_limit=rules.origin_config["line_length_limit"],
                        overall_size_limit=rules.origin_config["overall_size_limit"],
                        client=client, language=feature_extractor.language)
    X, y, (vnodes_y, vnodes, vnode_parents, node_parents) = feature_extractor \
        .extract_features(files)
    y_pred, rule_winners, _, grouped_quote_predictions = rules.predict(
        X=X, vnodes_y=vnodes_y, vnodes=vnodes, feature_extractor=feature_extractor)
    y_pred = rules.fill_missing_predictions(y_pred, y)
    checker = UASTStabilityChecker(feature_extractor=feature_extractor)
    y, y_pred, vnodes_y, rule_winners, safe_preds = checker.check(
        y=y, y_pred=y_pred, vnodes_y=vnodes_y, vnodes=vnodes, files=list(files), stub=client._stub,
        vnode_parents=vnode_parents, node_parents=node_parents, rule_winners=rule_winners,
        grouped_quote_predictions=grouped_quote_predictions)
    mispreds = get_mispreds(y, y_pred, vnodes_y, rule_winners)
    return mispreds
Exemple #3
0
    def test_parse_files(self):
        class Log:
            def debug(self, *args, **kwargs):
                nonlocal logged
                logged = True

        logged = False
        with NamedTemporaryFile(prefix="one", suffix=".js") as tmp1, \
                NamedTemporaryFile(prefix="two", suffix=".js") as tmp2:
            tmp1.write(b"hello")
            tmp1.seek(0)
            tmp2.write(b"world" * 100)
            tmp2.seek(0)
            try:
                bblfsh_client = BblfshClient("0.0.0.0:9432")
                filtered = parse_files(filepaths=[tmp1.name, tmp2.name],
                                       line_length_limit=80,
                                       overall_size_limit=5 << 20,
                                       client=bblfsh_client,
                                       language="javascript",
                                       log=Log())
                self.assertEqual(len(filtered), 1)
                self.assertEqual(filtered[0].content, b"hello")
                self.assertTrue(logged)
            finally:
                bblfsh_client._channel.close()
Exemple #4
0
def files2vnodes(filepaths: Iterable[str], feature_extractor: FeatureExtractor, rules: Rules,
                 client: BblfshClient) -> Iterable[VirtualNode]:
    """
    Return the `VirtualNode`-s extracted from a list of files.

    :param filepaths: List of files to get `Misprediction`-s and `VirtualNode`-s from.
    :param feature_extractor: FeatureExtractor to use.
    :param rules: Rules to use for prediction.
    :param client: Babelfish client. Babelfish server should be started accordingly.
    :return: List of `VirtualNode`-s extracted from a given list of files.
    """
    files = parse_files(filepaths=filepaths,
                        line_length_limit=rules.origin_config["line_length_limit"],
                        overall_size_limit=rules.origin_config["overall_size_limit"],
                        client=client, language=feature_extractor.language)
    _, _, (vnodes_y, _, _, _) = feature_extractor.extract_features(files)
    return vnodes_y
Exemple #5
0
    def test(self):
        fixes = []  # type: FileFix
        bblfsh_client = BblfshClient(self.bblfsh_endpoint)
        basedir = os.path.dirname(__file__)
        base_files = parse_files(
            filepaths=[os.path.join(basedir, "find_chrome_base.js")],
            line_length_limit=500,
            overall_size_limit=5 << 20,
            client=bblfsh_client,
            language="javascript")
        base_files[0].path = os.path.join(basedir, "find_chrome_head.js")

        class Runner(FormatAnalyzerSpy):
            def analyze(self, ptr_from: ReferencePointer,
                        ptr_to: ReferencePointer, data_service: DataService,
                        **data) -> List[Comment]:
                """
                We run the analysis on the single pair of files: `find_chrome.js`.
                """
                class FakeStub:
                    def GetFiles(self, *args, **kwargs):
                        return base_files

                class FakeDataService:
                    def get_data(self):
                        return FakeStub()

                fixes.extend(
                    self.run(ptr_from, data_service, FakeDataService()))
                return []

        log = logging.getLogger(type(self).__name__)
        analyze_files(
            Runner, {},
            os.path.join(basedir,
                         "style.format.analyzer.FormatAnalyzer_1.asdf"),
            "javascript", self.bblfsh_endpoint,
            os.path.join(basedir, "find_chrome_head.js"), log)
        self.assertEqual(len(fixes), 1)
        self.assertEqual(len(fixes[0].line_fixes), 1)
        fix = fixes[0].line_fixes[0]  # type: LineFix
        self.assertEqual(fix.line_number, 22)
        self.assertEqual(
            fix.suggested_code,
            "const execFileSync = require('child_process').execFileSync;")
Exemple #6
0
def train(training_dir: str,
          ref: ReferencePointer,
          output_path: str,
          language: str,
          bblfsh: str,
          config: Optional[Union[str, dict]],
          log: Optional[logging.Logger] = None) -> FormatModel:
    """
    Train a FormatModel for debugging purposes.

    :param training_dir: Path to the directory containing the files to train from.
    :param ref: Reference pointer to repository for training
    :param output_path: Path to the model to write.
    :param language: Language to filter on.
    :param bblfsh: Address of the babelfish server.
    :param config: Path to a YAML config to use during the training or \
                   json-like object with a config.
    :param log: logger used to report during training.
    :return: Trained FormatNodel.
    """
    bblfsh_client = BblfshClient(bblfsh)
    if config is not None:
        if isinstance(config, str):
            with open(config) as fh:
                config = safe_load(fh)
    else:
        config = {}
    config = FormatAnalyzer._load_config(config)
    filepaths = glob.glob(os.path.join(training_dir, "**", "*.js"),
                          recursive=True)
    model = FormatAnalyzer.train(
        ref, config,
        FakeDataService(bblfsh_client=bblfsh_client,
                        files=parse_files(filepaths=filepaths,
                                          line_length_limit=config["train"]
                                          [language]["line_length_limit"],
                                          overall_size_limit=config["train"]
                                          [language]["overall_size_limit"],
                                          client=bblfsh_client,
                                          language=language,
                                          log=log),
                        changes=None))
    model.save(output_path)
    return model