예제 #1
0
 def test_positions(self):
     test_js_code_filepath = Path(
         __file__).parent / "browser-policy-content.js"
     with open(str(test_js_code_filepath), mode="rb") as f:
         code = f.read()
     uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(filename="",
                                                      language="javascript",
                                                      contents=code).uast
     converter = BytesToUnicodeConverter(code)
     code_uni = converter.convert_content()
     uast_uni = converter.convert_uast(uast)
     file = UnicodeFile(content=code_uni,
                        uast=uast_uni,
                        language="javascript",
                        path="test.js")
     annotated_data = AnnotationManager.from_file(file)
     self.extractor._parse_file(annotated_data)
     nodes, _ = file_to_old_parse_file_format(annotated_data)
     for index, (node1, node2) in enumerate(zip(nodes, nodes[1:])):
         self.assertLessEqual(
             node1.start.line, node2.start.line,
             "Start line position decrease for %d, %d nodes" %
             (index, index + 1))
         self.assertLessEqual(
             node1.start.offset, node2.start.offset,
             "Start offset position decrease for %d, %d nodes" %
             (index, index + 1))
예제 #2
0
def analyze_files(
    analyzer_type: Type[FormatAnalyzer],
    config: dict,
    model_path: str,
    language: str,
    bblfsh_addr: str,
    input_pattern: str,
    log: logging.Logger,
) -> List[Comment]:
    """Run the model, record the fixes for each file and return them."""
    class FakePointer:
        def to_pb(self):
            return None

    model = FormatModel().load(model_path)
    if language not in model:
        raise NotFittedError()
    rules = model[language]
    client = bblfsh.BblfshClient(bblfsh_addr)
    files = parse_files(
        filepaths=glob.glob(input_pattern, recursive=True),
        line_length_limit=rules.origin_config["line_length_limit"],
        overall_size_limit=rules.origin_config["overall_size_limit"],
        client=client,
        language=language,
        log=log)
    log.info("Model parameters: %s" % rules.origin_config)
    log.info("Rules stats: %s" % rules)
    log.info("Number of files: %s" % (len(files)))
    return analyzer_type(model, input_pattern,
                         config).analyze(FakePointer(),
                                         None,
                                         data_service=FakeDataService(
                                             client, files, []))
예제 #3
0
 def setUpClass(cls):
     logging.basicConfig(level=logging.INFO)
     logging.getLogger("IdTyposAnalyzer").setLevel(logging.DEBUG)
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432")
     with lzma.open(str(base / "test_base_file.js.xz")) as fin:
         contents = fin.read()
         uast = cls.bblfsh_client.parse("test_base_file.js",
                                        contents=contents).uast
         cls.base_files = [
             FakeFile(path="test_base_file.js",
                      content=contents,
                      uast=uast,
                      language="Javascript")
         ]
     with lzma.open(str(base / "test_head_file.js.xz")) as fin:
         contents = fin.read()
         uast = cls.bblfsh_client.parse("test_head_file.js",
                                        contents=contents).uast
         cls.head_files = [
             FakeFile(path="test_head_file.js",
                      content=contents,
                      uast=uast,
                      language="Javascript")
         ]
     cls.ptr = ReferencePointer("someurl", "someref", "somecommit")
예제 #4
0
 def analyze_code_file(path: str):
     nonlocal errors
     if errors:
         return
     try:
         try:
             client = clients.client
         except AttributeError:
             client = bblfsh.BblfshClient(args.bblfsh)
             clients.client = client
         response = client.parse(path)
         nonlocal language
         if not language:
             language = response.language
         elif language != response.language:
             log.warning("dropped %s - language mismatch %s != %s", path,
                         language, response.language)
             return
         content = Path(path).read_text()
         analyze_uast(path, content, response.uast, internal_types, roles,
                      reserved)
     except:  # noqa: E722
         log.exception("Parsing %s", path)
         errors = True
     finally:
         with progress_lock:
             progress.disable = False  # this is needed, do not remove
             progress.update(1)
예제 #5
0
    def __init__(self, language: str = "javascript", bblfsh_address: str = "0.0.0.0:9432"):
        """
        Construct a `CodeTokenizer`.

        :param language: Which language to extract features for.
        :param bblfsh_address: Address of bblfsh server.
        """
        self.language = language.lower()
        # import everything related to language
        self.tokens = importlib.import_module("tokenizer.langs.%s.tokens" % language)
        self.roles = importlib.import_module("tokenizer.langs.%s.roles" % language)
        try:
            self.token_unwrappers = importlib.import_module(
                "tokenizer..langs.%s.token_unwrappers" % language).TOKEN_UNWRAPPERS
        except ImportError:
            # It's normal for some languages not to have a token_unwrappers module.
            self.token_unwrappers = {}
        try:
            self.node_fixtures = importlib.import_module(
                "tokenizer.langs.%s.uast_fixers" % language).NODE_FIXTURES
        except ImportError:
            # It's normal for some languages not to have a uast_fixes module.
            self.node_fixtures = {}

        # Create instance of bblfsh client in case of bblfsh_address is not None.
        # If None - UAST has to be provided by client.
        if bblfsh_address is not None:
            self.client = bblfsh.BblfshClient(bblfsh_address)
예제 #6
0
 def test_parse_file_exact_match(self):
     test_js_code_filepath = Path(__file__).parent / "for_parse_test.js.xz"
     with lzma.open(str(test_js_code_filepath), mode="rt") as f:
         code = f.read()
     uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(
         filename="", language="javascript", contents=code.encode()).uast
     nodes, parents = self.extractor._parse_file(code, uast, test_js_code_filepath)
     self.assertEqual("".join(n.value for n in nodes), code)
예제 #7
0
 def setUpClass(cls):
     slogging_setup("DEBUG", False)
     cls.language = "javascript"
     cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432")
     cls.data_service = FakeDataService(cls.bblfsh_client, files=None, changes=None)
     cls.stub = cls.data_service.get_bblfsh()
     cls.config = FormatAnalyzer._load_config({
         "train": {"language_defaults": {"feature_extractor": {"cutoff_label_support": 0}}},
     })["train"][cls.language]["feature_extractor"]
예제 #8
0
 def setUp(self):
     self.bblfsh = bblfsh.BblfshClient("localhost:9432")
     self.uast = self.bblfsh.parse(models.SOURCE_PY, mode=bblfsh.Modes.ANNOTATED).uast
     self.uast2walk = Uast2RandomWalks(p_explore_neighborhood=0.5,
                                       q_leave_neighborhood=0.5,
                                       n_walks=5,
                                       n_steps=19,
                                       node2index=FakeVocabulary(),
                                       seed=42)
예제 #9
0
    def test_extract_functions_from_uast(self):
        client = bblfsh.BblfshClient("localhost:9432")
        uast = client.parse(MODER_FUNC).uast
        functions = list(Moder(mode="func").extract_functions_from_uast(uast))
        self.assertEqual(len(functions), 3)

        function_names = ["func_a", "func_b", "func_c"]
        for f in functions:
            self.assertIn(f[0].token, function_names)
예제 #10
0
 def test_vnode_positions(self):
     test_js_code_filepath = Path(__file__).parent / "jquery.layout.js"
     with open(str(test_js_code_filepath), mode="rb") as f:
         code = f.read()
     uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(
         filename="", language="javascript", contents=code).uast
     nodes, parents = list(self.extractor._parse_file(code.decode("utf-8", "replace"),
                                                      uast, test_js_code_filepath))
     # Just should not fail
     list(self.extractor._classify_vnodes(nodes, "filepath"))
예제 #11
0
 def __init__(self, source_file, cfg=None):
     #client = cfg.ast_client()
     client = bblfsh.BblfshClient('localhost:9432')
     tree = client.parse(source_file).uast
     self.id = 0
     self.nodes = []
     self.nmap = {}
     self.visited = []
     self.anytree = None
     self.__process__(tree, tree)
     self.__node_mapping__()
     self.anytree = self.__get_any_tree__()
예제 #12
0
 def setUpClass(cls):
     logging.basicConfig(level=logging.INFO)
     logging.getLogger("FormatAnalyzer").setLevel(logging.DEBUG)
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     with lzma.open(str(base / "benchmark.uast.xz")) as fin:
         cls.uast = bblfsh.Node.FromString(fin.read())
     cls.base_files = cls.get_files_from_tar(str(base / "freecodecamp-base.tar.xz"))
     cls.head_files = cls.get_files_from_tar(str(base / "freecodecamp-head.tar.xz"))
     cls.ptr = ReferencePointer("someurl", "someref", "somecommit")
     FeatureExtractor._log.level = logging.DEBUG
     cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432")
예제 #13
0
파일: test.py 프로젝트: zurk/style-analyzer
 def test_vnode_positions(self):
     test_js_code_filepath = Path(__file__).parent / "jquery.layout.js"
     with open(str(test_js_code_filepath), mode="rb") as f:
         code = f.read()
     uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(
         filename="", language="javascript", contents=code).uast
     file = BytesToUnicodeConverter.convert_file(
         File(content=code, uast=uast, language="javascript", path="test.js"))
     annotated_data = AnnotationManager.from_file(file)
     self.extractor._parse_file(annotated_data)
     # Just should not fail
     self.extractor._classify_vnodes(annotated_data)
예제 #14
0
def main() -> None:
    args = parse_arguments()

    import bblfsh
    from pprint import pprint

    client = bblfsh.BblfshClient(args.ip + ":" + args.port)
    parse_result = client.parse(args.file)
    if parse_result.status != 0:
        print(parse_result.errors)

    pprint(run_checks(args.checks, args.language, parse_result.uast))
예제 #15
0
 def test_parse_file_comment_after_regexp(self):
     code = b"x = // comment\n/<regexp>/;"
     uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(filename="",
                                                      language="javascript",
                                                      contents=code).uast
     file = BytesToUnicodeConverter.convert_file(
         File(uast=uast, content=code, language="javascript", path=""))
     annotated_file = AnnotationManager.from_file(file)
     self.extractor._parse_file(annotated_file)
     self.assertEqual(
         "".join(
             annotated_file[token.span]
             for token in annotated_file.iter_by_type(RawTokenAnnotation)),
         code.decode())
예제 #16
0
파일: utils.py 프로젝트: bzz/sonar-checks
def run_default_fixture(path: str, check_fnc: CheckFnc, conn_str: str = "0.0.0.0:9432", silent: bool=False) \
        -> Checks:

    from pprint import pprint

    client = bblfsh.BblfshClient(conn_str)
    language = os.path.normpath(os.path.abspath(
        inspect.getfile(check_fnc))).split(os.sep)[-2]
    fixture_path = os.path.join(THIS_PATH, "fixtures", language,
                                os.path.split(path)[1][:-3] + ".java")
    res = check_fnc(client.parse(fixture_path).uast)
    if not silent:
        pprint(res)

    return res
예제 #17
0
def main(data, lang, output):
    client = bblfsh.BblfshClient("0.0.0.0:9432")
    files = recursive_glob(data, '*.%s' % lang)

    for file in files:
        print("Processing file: {}".format(file))
        uast = client.parse(file).uast
        if len(uast.children) > 0:
            out_file = "%s/%s_uast.bin" % (output, file)
            print("Writing file %s" % out_file)
            if not os.path.exists(os.path.dirname(out_file)):
                os.makedirs(os.path.dirname(out_file))
            with open(out_file, 'wb') as o:
                o.write(uast.SerializeToString())

    return
예제 #18
0
    def test_empty_strings(self):
        config = deepcopy(self.final_config["feature_extractor"])
        config["cutoff_label_support"] = 0
        client = bblfsh.BblfshClient("0.0.0.0:9432")

        def get_class_sequences_from_code(code: str) -> Sequence[Tuple[int, ...]]:
            uast = client.parse(filename="", language="javascript", contents=code.encode()).uast
            extractor = FeatureExtractor(language="javascript", **config)
            result = extractor.extract_features([UnicodeFile(content=code, uast=uast, path="",
                                                             language="javascript")])
            if result is None:
                self.fail("Could not parse test code.")
            _, _, (vnodes_y, _, _, _) = result
            return [vnode.y for vnode in vnodes_y]
        self.assertEqual(get_class_sequences_from_code("var a = '';"),
                         get_class_sequences_from_code("var a = 'a';"))
예제 #19
0
def main() -> None:
    args = parse_arguments()

    import bblfsh

    client = bblfsh.BblfshClient(args.ip + ":" + args.port)
    parse_result = client.parse(args.file)
    if parse_result.status != 0:
        print(json.dumps(parse_result.errors))

    print(
        json.dumps(
            run_checks(args.checks,
                       args.language,
                       parse_result.uast,
                       json_result=False)))
예제 #20
0
 def setUpClass(cls):
     config = FormatAnalyzer._load_config(get_config())["train"]
     cls.extractor = FeatureExtractor(
         language="javascript", **config["javascript"]["feature_extractor"])
     test_js_code_filepath = Path(__file__).parent / "jquery.layout.js"
     with open(str(test_js_code_filepath), mode="rb") as f:
         cls.code = f.read()
     cls.uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(
         filename="", language="javascript", contents=cls.code).uast
     feature_extractor_output = cls.extractor.extract_features([
         FakeFile(path="test.py",
                  content=cls.code,
                  uast=cls.uast,
                  language="JavaScript")
     ])
     X, cls.y, (cls.vnodes_y, cls.vnodes, vnode_parents, node_parents) = \
         feature_extractor_output
예제 #21
0
 def test_positions(self):
     test_js_code_filepath = Path(
         __file__).parent / "browser-policy-content.js"
     with open(str(test_js_code_filepath), mode="rt") as f:
         code = f.read()
     uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(
         filename="", language="javascript", contents=code.encode()).uast
     nodes, parents = self.extractor._parse_file(code, uast,
                                                 test_js_code_filepath)
     for index, (node1, node2) in enumerate(zip(nodes, nodes[1:])):
         self.assertLessEqual(
             node1.start.line, node2.start.line,
             "Start line position decrease for %d, %d nodes" %
             (index, index + 1))
         self.assertLessEqual(
             node1.start.offset, node2.start.offset,
             "Start offset position decrease for %d, %d nodes" %
             (index, index + 1))
예제 #22
0
    def setUp(self):
        self.languages = get_languages()
        self.check_funcs: Dict[str, Dict[str, Any]] = {}
        self.fixtures: Dict[str, List[str]] = {}

        fixtures_dir = get_fixtures_dir()

        for lang in self.languages:
            self.check_funcs[lang] = {
                path: module.check
                for (module, path) in _get_check_modules(lang)
            }
            self.fixtures[lang] = [
                os.path.join(fixtures_dir, lang, i)
                for i in os.listdir(os.path.join(fixtures_dir, lang))
            ]

        self.client = bblfsh.BblfshClient("0.0.0.0:9432")
예제 #23
0
 def test_parse_file_exact_match(self):
     test_js_code_filepath = str(
         Path(__file__).parent / "for_parse_test.js.xz")
     with lzma.open(test_js_code_filepath, mode="rb") as f:
         code = f.read()
     uast = bblfsh.BblfshClient("0.0.0.0:9432").parse(filename="",
                                                      language="javascript",
                                                      contents=code).uast
     file = BytesToUnicodeConverter.convert_file(
         File(uast=uast, content=code, language="javascript", path=""))
     annotated_file = AnnotationManager.from_file(file)
     self.extractor._parse_file(annotated_file)
     nodes, _ = file_to_old_parse_file_format(annotated_file)
     self.assertEqual("".join(n.value for n in nodes), code.decode())
     self.assertEqual(
         "".join(
             annotated_file[token.span]
             for token in annotated_file.iter_by_type(RawTokenAnnotation)),
         code.decode())
예제 #24
0
    def __init__(self,
                 n_trials: int = 4,
                 max_rep: int = 2,
                 max_ins: int = 2,
                 max_mutants: int = 10,
                 bblfsh_address: str = "0.0.0.0:9432"):
        """
        Initialize mutator.

        :param n_trials: number of trials for mutation.
        :param max_rep: max number of repeats.
        :param max_ins: max number of insertions.
        :param max_mutants: max number of mutants to collect.
        :param depth: max number of mutants to collect.
        """
        self.n_trials = n_trials
        self.max_rep = max_rep
        self.max_ins = max_ins
        self.max_mutants = max_mutants
        self.bblfsh_address = bblfsh_address
        self.client = bblfsh.BblfshClient(endpoint=bblfsh_address)
예제 #25
0
 def setUpClass(cls):
     logging.basicConfig(level=logging.INFO)
     logging.getLogger("IdTyposAnalyzer").setLevel(logging.DEBUG)
     base = Path(__file__).parent
     # str() is needed for Python 3.5
     cls.bblfsh_client = bblfsh.BblfshClient("0.0.0.0:9432")
     with lzma.open(str(base / "test_base_file.js.xz")) as fin:
         contents = fin.read()
         uast = cls.bblfsh_client.parse("test_base_file.js",
                                        contents=contents).uast
         cls.base_files = [
             File(path="test_file.js",
                  content=contents,
                  uast=uast,
                  language="Javascript")
         ]
     with lzma.open(str(base / "test_head_file.js.xz")) as fin:
         contents = b"var print_tipe = 0;\n" + fin.read()
         uast = cls.bblfsh_client.parse("test_head_file.js",
                                        contents=contents).uast
         cls.head_files = [
             File(path="test_file.js",
                  content=contents,
                  uast=uast,
                  language="Javascript")
         ]
     cls.ptr = ReferencePointer("someurl", "someref", "somecommit")
     cls.config = {
         "model": MODEL_PATH,
         "confidence_threshold": 0.0,
         "n_candidates": 3,
         "check_all_identifiers": True,
         "analyze": {
             "filepath": cls.base_files[0].path,
             "wrong_id": "print_tipe",
             "line": 0
         }
     }
예제 #26
0
    def main_per_repository(self, repo_name: str) -> None:
        STATS_PATH = DEFAULT_STATS_DIR / repo_name / 'stats.db'
        STATS_PATH.parent.mkdir(parents=True, exist_ok=True)
        if STATS_PATH.exists():
            logger.info('Stats already exist for %s' % repo_name)
            return

        stats = Stats()
        client = bblfsh.BblfshClient(self._bblfshd)
        repo = get_repository(repo_name)
        trainer = GitRepositoryTrainer(repo=repo,
                                       repo_name=repo_name,
                                       client=client,
                                       stats=stats,
                                       filters=[
                                           VendorFilter(),
                                           LanguageFilter(['Go']),
                                           MaxSizeFilter(max_size=10 * 1024)
                                       ])
        trainer.train_all()
        logger.info('saving stats: %s' % STATS_PATH)
        stats.save(filename=STATS_PATH)
        logger.info('saved stats: %s' % STATS_PATH)
예제 #27
0
 def analyze_file(path: str):
     nonlocal errors
     if errors:
         return
     try:
         try:
             client = clients.client
         except AttributeError:
             client = bblfsh.BblfshClient(args.bblfsh)
             clients.client = client
         response = client.parse(path)
         nonlocal language
         if not language:
             language = response.language
         elif language != response.language:
             log.warning("dropped %s - language mismatch %s != %s", path,
                         language, response.language)
             return
         analyze_uast(path, response.uast, roles, reserved)
         progress.update(1)
     except:  # noqa: E722
         log.exception("Parsing %s", path)
         errors = True
예제 #28
0
def main(data, output):

    client = bblfsh.BblfshClient("0.0.0.0:9432")
    files = recursive_glob(data, '*.py')

    uasts = []
    for file in files:
        print("Processing file: {}".format(file))
        uast = client.parse(file).uast
        if len(uast.children) > 0:
            uasts.append(uast)
    # print(uast)
    # "filter' allows you to use XPath queries to filter on result nodes:
    # print(bblfsh.filter(uast, "//Import[@roleImport and @roleDeclaration]//alias"))

    rules_count, nodes_count = process_uasts(uasts)

    # print_statistics(rules_count, nodes_count)
    #
    # cluster_nodes(nodes_count)
    #
    # save_roles(output, nodes_count)

    return
예제 #29
0
import bblfsh
import sys
import os

from bblfsh import filter as filter_uast
from java_sonar_rule_RSPEC_1214 import rule_chk as rule_chk
from java_while_rule import rule_chk as rule_chk_while
from java_sonar_rule_RSPEC_1764 import rule_chk as rule_chk_1764

if __name__ == "__main__":

    #Initializing the babelfish client
    client = bblfsh.BblfshClient("0.0.0.0:9432")
    response = client.parse(sys.argv[1])

    if response.status != 0:
        raise Exception('Some error happened: ' + str(response.errors))

    findings = rule_chk_while(response.uast)
    print(findings)
예제 #30
0
 def __init__(self, path):
     self.path = path
     self.client = bblfsh.BblfshClient(self.server_endpoint)