def setUp(self): config = FormatAnalyzer._load_config(get_config()) self.annotated_file = AnnotationManager.from_file(self.file) self.final_config = config["train"]["javascript"] self.extractor = FeatureExtractor( language="javascript", **self.final_config["feature_extractor"]) self.annotated_file = AnnotationManager.from_file(self.file)
def generate_local_test(mcs, case_name, uast, contents): fe_config = FormatAnalyzer._load_config( get_config())["train"]["javascript"] feature_extractor = FeatureExtractor(language="javascript", label_composites=label_composites, **fe_config["feature_extractor"]) file = UnicodeFile(content=contents, uast=uast, path="", language="") _, _, (vnodes_y, _, _, _) = feature_extractor.extract_features([file]) offsets, y_pred, result = cases[case_name] def _test(self): y_cur = deepcopy(self.y) for offset, yi in zip(offsets, y_pred): i = None for i, vnode in enumerate(vnodes_y): # noqa: B007 if offset == vnode.start.offset: break y_cur[i] = yi code_generator = CodeGenerator(self.feature_extractor) pred_vnodes = code_generator.apply_predicted_y( self.vnodes, self.vnodes_y, list(range(len(self.vnodes_y))), FakeRules(y_cur)) generated_file = code_generator.generate(pred_vnodes) self.assertEqual(generated_file, result) return _test
def setUpClass(cls): config = FormatAnalyzer._load_config( merge_dicts( get_config(), { "train": { "javascript": { "feature_extractor": { "left_siblings_window": 1, "right_siblings_window": 1, "parents_depth": 1, "node_features": ["start_line", "reserved", "roles"], }, }, }, }))["train"] base = Path(__file__).parent with lzma.open(str(base / "benchmark.js.xz"), mode="rt") as fin: contents = fin.read() with lzma.open(str(base / "benchmark.uast.xz")) as fin: uast = bblfsh.Node.FromString(fin.read()) file = File(content=bytes(contents, "utf-8"), uast=uast) files = [file, file] cls.fe = FeatureExtractor(language="javascript", **config["javascript"]["feature_extractor"]) cls.fe.extract_features(files) cls.class_representations = cls.fe.composite_class_representations cls.n_classes = len(cls.fe.labels_to_class_sequences) cls.ordinal = cls.return_node_feature(FeatureId.start_line) cls.categorical = cls.return_node_feature(FeatureId.reserved) cls.bag = cls.return_node_feature(FeatureId.roles)
def setUpClass(cls): slogging_setup("DEBUG", False) cls.language = "javascript" cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432") cls.data_service = FakeDataService(cls.bblfsh_client, files=None, changes=None) cls.stub = cls.data_service.get_bblfsh() cls.config = FormatAnalyzer._load_config({ "train": {"language_defaults": {"feature_extractor": {"cutoff_label_support": 0}}}, })["train"][cls.language]["feature_extractor"]
def setUpClass(cls): base = Path(__file__).parent # str() is needed for Python 3.5 with lzma.open(str(base / "benchmark.js.xz"), mode="rt") as fin: contents = fin.read() with lzma.open(str(base / "benchmark.uast.xz")) as fin: uast = bblfsh.Node.FromString(fin.read()) file = File(content=bytes(contents, "utf-8"), uast=uast) cls.files = [file] config = FormatAnalyzer._load_config(get_config())["train"] cls.extractor = FeatureExtractor( language="javascript", **config["javascript"]["feature_extractor"])
def setUpClass(cls): config = FormatAnalyzer._load_config(get_config())["train"] cls.extractor = FeatureExtractor( language="javascript", **config["javascript"]["feature_extractor"]) test_js_code_filepath = Path(__file__).parent / "jquery.layout.js" with open(str(test_js_code_filepath), mode="rb") as f: cls.code = f.read() cls.uast = bblfsh.BblfshClient("0.0.0.0:9432").parse( filename="", language="javascript", contents=cls.code).uast feature_extractor_output = cls.extractor.extract_features([ FakeFile(path="test.py", content=cls.code, uast=cls.uast, language="JavaScript") ]) X, cls.y, (cls.vnodes_y, cls.vnodes, vnode_parents, node_parents) = \ feature_extractor_output
def setUpClass(cls): cls.maxDiff = None base = Path(__file__).parent # str() is needed for Python 3.5 with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin: contents = fin.read() with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin: uast = bblfsh.Node.FromString(fin.read()) config = FormatAnalyzer._load_config(get_config()) fe_config = config["train"]["javascript"] cls.feature_extractor = FeatureExtractor( language="javascript", label_composites=label_composites, **fe_config["feature_extractor"]) cls.file = File(content=bytes(contents, "utf-8"), uast=uast) cls.X, cls.y, (cls.vnodes_y, cls.vnodes, cls.vnode_parents, cls.node_parents) = \ cls.feature_extractor.extract_features([cls.file])
def train(training_dir: str, ref: ReferencePointer, output_path: str, language: str, bblfsh: str, config: Optional[Union[str, dict]], log: Optional[logging.Logger] = None) -> FormatModel: """ Train a FormatModel for debugging purposes. :param training_dir: Path to the directory containing the files to train from. :param ref: Reference pointer to repository for training :param output_path: Path to the model to write. :param language: Language to filter on. :param bblfsh: Address of the babelfish server. :param config: Path to a YAML config to use during the training or \ json-like object with a config. :param log: logger used to report during training. :return: Trained FormatNodel. """ bblfsh_client = BblfshClient(bblfsh) if config is not None: if isinstance(config, str): with open(config) as fh: config = safe_load(fh) else: config = {} config = FormatAnalyzer._load_config(config) filepaths = glob.glob(os.path.join(training_dir, "**", "*.js"), recursive=True) model = FormatAnalyzer.train( ref, config, FakeDataService(bblfsh_client=bblfsh_client, files=parse_files(filepaths=filepaths, line_length_limit=config["train"] [language]["line_length_limit"], overall_size_limit=config["train"] [language]["overall_size_limit"], client=bblfsh_client, language=language, log=log), changes=None)) model.save(output_path) return model
def setUp(self): config = FormatAnalyzer._load_config(get_config())["train"] self.extractor = FeatureExtractor(language="javascript", **config["javascript"]["feature_extractor"])
def test_generate_new_line(self): self.maxDiff = None expected_res = { "nothing changed": [], "remove new line in the end of 4th line": None, "indentation in the beginning": [" import { makeToast } from '../../common/app/Toasts/redux';"], "remove indentation in the 4th line till the end": [" return Object.keys(flash)", " }"], "new line between 6th and 7th regular code lines": ["\n return messages.map(message => ({"], "new line in the middle of the 7th code line with indentation increase": [" return messages\n .map(message => ({", " })"], "new line in the middle of the 7th code line with indentation decrease": [" return messages\n .map(message => ({", " })"], "new line in the middle of the 7th code line without indentation increase": [" return messages\n .map(message => ({"], "change quotes": ['import { makeToast } from "../../common/app/Toasts/redux";'], "remove indentation decrease 11th line": [" }));"], "change indentation decrease to indentation increase 11th line": [" }));"], "change indentation decrease to indentation increase 11th line but keep the rest": [" }));", "})"], } base = Path(__file__).parent # str() is needed for Python 3.5 with lzma.open(str(base / "benchmark_small.js.xz"), mode="rt") as fin: contents = fin.read() with lzma.open(str(base / "benchmark_small.js.uast.xz")) as fin: uast = bblfsh.Node.FromString(fin.read()) config = FormatAnalyzer._load_config(get_config()) fe_config = config["train"]["javascript"] for case in expected_res: offsets, y_pred, _ = cases[case] feature_extractor = FeatureExtractor( language="javascript", label_composites=label_composites, **fe_config["feature_extractor"]) file = UnicodeFile(content=contents, uast=uast, path="", language="") X, y, (vnodes_y, vnodes, vnode_parents, node_parents) = \ feature_extractor.extract_features([file]) y_cur = deepcopy(y) for offset, yi in zip(offsets, y_pred): i = None for i, vnode in enumerate(vnodes_y): # noqa: B007 if offset == vnode.start.offset: break y_cur[i] = yi code_generator = CodeGenerator(feature_extractor) pred_vnodes = code_generator.apply_predicted_y( vnodes, vnodes_y, list(range(len(vnodes_y))), FakeRules(y_cur)) res = [] for gln in FormatAnalyzer._group_line_nodes( y, y_cur, vnodes_y, pred_vnodes, [1] * len(y)): line, (line_y, line_y_pred, line_vnodes_y, line_vnodes, line_rule_winners) = gln new_code_line = code_generator.generate_new_line(line_vnodes) res.append(new_code_line) if expected_res[case] is not None: # None means that we delete some lines. We are not handle this properly now. self.assertEqual(res, expected_res[case], case)