Exemple #1
0
 def setUp(self):
     config = FormatAnalyzer._load_config(get_config())
     self.annotated_file = AnnotationManager.from_file(self.file)
     self.final_config = config["train"]["javascript"]
     self.extractor = FeatureExtractor(
         language="javascript", **self.final_config["feature_extractor"])
     self.annotated_file = AnnotationManager.from_file(self.file)
Exemple #2
0
    def generate_local_test(mcs, case_name, uast, contents):
        fe_config = FormatAnalyzer._load_config(
            get_config())["train"]["javascript"]
        feature_extractor = FeatureExtractor(language="javascript",
                                             label_composites=label_composites,
                                             **fe_config["feature_extractor"])
        file = UnicodeFile(content=contents, uast=uast, path="", language="")
        _, _, (vnodes_y, _, _, _) = feature_extractor.extract_features([file])
        offsets, y_pred, result = cases[case_name]

        def _test(self):
            y_cur = deepcopy(self.y)
            for offset, yi in zip(offsets, y_pred):
                i = None
                for i, vnode in enumerate(vnodes_y):  # noqa: B007
                    if offset == vnode.start.offset:
                        break
                y_cur[i] = yi
            code_generator = CodeGenerator(self.feature_extractor)
            pred_vnodes = code_generator.apply_predicted_y(
                self.vnodes, self.vnodes_y, list(range(len(self.vnodes_y))),
                FakeRules(y_cur))
            generated_file = code_generator.generate(pred_vnodes)
            self.assertEqual(generated_file, result)

        return _test
Exemple #3
0
 def setUpClass(cls):
     config = FormatAnalyzer._load_config(
         merge_dicts(
             get_config(), {
                 "train": {
                     "javascript": {
                         "feature_extractor": {
                             "left_siblings_window":
                             1,
                             "right_siblings_window":
                             1,
                             "parents_depth":
                             1,
                             "node_features":
                             ["start_line", "reserved", "roles"],
                         },
                     },
                 },
             }))["train"]
     base = Path(__file__).parent
     with lzma.open(str(base / "benchmark.js.xz"), mode="rt") as fin:
         contents = fin.read()
     with lzma.open(str(base / "benchmark.uast.xz")) as fin:
         uast = bblfsh.Node.FromString(fin.read())
     file = File(content=bytes(contents, "utf-8"), uast=uast)
     files = [file, file]
     cls.fe = FeatureExtractor(language="javascript",
                               **config["javascript"]["feature_extractor"])
     cls.fe.extract_features(files)
     cls.class_representations = cls.fe.composite_class_representations
     cls.n_classes = len(cls.fe.labels_to_class_sequences)
     cls.ordinal = cls.return_node_feature(FeatureId.start_line)
     cls.categorical = cls.return_node_feature(FeatureId.reserved)
     cls.bag = cls.return_node_feature(FeatureId.roles)
Exemple #4
0
 def setUpClass(cls):
     slogging_setup("DEBUG", False)
     cls.language = "javascript"
     cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432")
     cls.data_service = FakeDataService(cls.bblfsh_client, files=None, changes=None)
     cls.stub = cls.data_service.get_bblfsh()
     cls.config = FormatAnalyzer._load_config({
         "train": {"language_defaults": {"feature_extractor": {"cutoff_label_support": 0}}},
     })["train"][cls.language]["feature_extractor"]
Exemple #5
0
 def setUpClass(cls):
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     with lzma.open(str(base / "benchmark.js.xz"), mode="rt") as fin:
         contents = fin.read()
     with lzma.open(str(base / "benchmark.uast.xz")) as fin:
         uast = bblfsh.Node.FromString(fin.read())
     file = File(content=bytes(contents, "utf-8"), uast=uast)
     cls.files = [file]
     config = FormatAnalyzer._load_config(get_config())["train"]
     cls.extractor = FeatureExtractor(
         language="javascript", **config["javascript"]["feature_extractor"])
Exemple #6
0
 def setUpClass(cls):
     config = FormatAnalyzer._load_config(get_config())["train"]
     cls.extractor = FeatureExtractor(
         language="javascript", **config["javascript"]["feature_extractor"])
     test_js_code_filepath = Path(__file__).parent / "jquery.layout.js"
     with open(str(test_js_code_filepath), mode="rb") as f:
         cls.code = f.read()
     cls.uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(
         filename="", language="javascript", contents=cls.code).uast
     feature_extractor_output = cls.extractor.extract_features([
         FakeFile(path="test.py",
                  content=cls.code,
                  uast=cls.uast,
                  language="JavaScript")
     ])
     X, cls.y, (cls.vnodes_y, cls.vnodes, vnode_parents, node_parents) = \
         feature_extractor_output
 def setUpClass(cls):
     cls.maxDiff = None
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin:
         contents = fin.read()
     with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin:
         uast = bblfsh.Node.FromString(fin.read())
     config = FormatAnalyzer._load_config(get_config())
     fe_config = config["train"]["javascript"]
     cls.feature_extractor = FeatureExtractor(
         language="javascript",
         label_composites=label_composites,
         **fe_config["feature_extractor"])
     cls.file = File(content=bytes(contents, "utf-8"), uast=uast)
     cls.X, cls.y, (cls.vnodes_y, cls.vnodes, cls.vnode_parents, cls.node_parents) = \
         cls.feature_extractor.extract_features([cls.file])
Exemple #8
0
def train(training_dir: str,
          ref: ReferencePointer,
          output_path: str,
          language: str,
          bblfsh: str,
          config: Optional[Union[str, dict]],
          log: Optional[logging.Logger] = None) -> FormatModel:
    """
    Train a FormatModel for debugging purposes.

    :param training_dir: Path to the directory containing the files to train from.
    :param ref: Reference pointer to repository for training
    :param output_path: Path to the model to write.
    :param language: Language to filter on.
    :param bblfsh: Address of the babelfish server.
    :param config: Path to a YAML config to use during the training or \
                   json-like object with a config.
    :param log: logger used to report during training.
    :return: Trained FormatNodel.
    """
    bblfsh_client = BblfshClient(bblfsh)
    if config is not None:
        if isinstance(config, str):
            with open(config) as fh:
                config = safe_load(fh)
    else:
        config = {}
    config = FormatAnalyzer._load_config(config)
    filepaths = glob.glob(os.path.join(training_dir, "**", "*.js"),
                          recursive=True)
    model = FormatAnalyzer.train(
        ref, config,
        FakeDataService(bblfsh_client=bblfsh_client,
                        files=parse_files(filepaths=filepaths,
                                          line_length_limit=config["train"]
                                          [language]["line_length_limit"],
                                          overall_size_limit=config["train"]
                                          [language]["overall_size_limit"],
                                          client=bblfsh_client,
                                          language=language,
                                          log=log),
                        changes=None))
    model.save(output_path)
    return model
Exemple #9
0
 def setUp(self):
     config = FormatAnalyzer._load_config(get_config())["train"]
     self.extractor = FeatureExtractor(language="javascript",
                                       **config["javascript"]["feature_extractor"])
Exemple #10
0
    def test_generate_new_line(self):
        self.maxDiff = None
        expected_res = {
            "nothing changed": [],
            "remove new line in the end of 4th line":
            None,
            "indentation in the beginning":
            [" import { makeToast } from '../../common/app/Toasts/redux';"],
            "remove indentation in the 4th line till the end":
            [" return Object.keys(flash)", " }"],
            "new line between 6th and 7th regular code lines":
            ["\n      return messages.map(message => ({"],
            "new line in the middle of the 7th code line with indentation increase":
            ["      return messages\n        .map(message => ({", "  })"],
            "new line in the middle of the 7th code line with indentation decrease":
            ["      return messages\n    .map(message => ({", "      })"],
            "new line in the middle of the 7th code line without indentation increase":
            ["      return messages\n      .map(message => ({"],
            "change quotes":
            ['import { makeToast } from "../../common/app/Toasts/redux";'],
            "remove indentation decrease 11th line": ["        }));"],
            "change indentation decrease to indentation increase 11th line":
            ["          }));"],
            "change indentation decrease to indentation increase 11th line but keep the rest":
            ["          }));", "})"],
        }

        base = Path(__file__).parent
        # str() is needed for Python 3.5
        with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin:
            contents = fin.read()
        with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin:
            uast = bblfsh.Node.FromString(fin.read())
        config = FormatAnalyzer._load_config(get_config())
        fe_config = config["train"]["javascript"]

        for case in expected_res:
            offsets, y_pred, _ = cases[case]
            feature_extractor = FeatureExtractor(
                language="javascript",
                label_composites=label_composites,
                **fe_config["feature_extractor"])
            file = UnicodeFile(content=contents,
                               uast=uast,
                               path="",
                               language="")
            X, y, (vnodes_y, vnodes, vnode_parents, node_parents) = \
                feature_extractor.extract_features([file])
            y_cur = deepcopy(y)
            for offset, yi in zip(offsets, y_pred):
                i = None
                for i, vnode in enumerate(vnodes_y):  # noqa: B007
                    if offset == vnode.start.offset:
                        break
                y_cur[i] = yi
            code_generator = CodeGenerator(feature_extractor)
            pred_vnodes = code_generator.apply_predicted_y(
                vnodes, vnodes_y, list(range(len(vnodes_y))), FakeRules(y_cur))
            res = []
            for gln in FormatAnalyzer._group_line_nodes(
                    y, y_cur, vnodes_y, pred_vnodes, [1] * len(y)):
                line, (line_y, line_y_pred, line_vnodes_y, line_vnodes,
                       line_rule_winners) = gln
                new_code_line = code_generator.generate_new_line(line_vnodes)
                res.append(new_code_line)
            if expected_res[case] is not None:
                # None means that we delete some lines. We are not handle this properly now.
                self.assertEqual(res, expected_res[case], case)