Ejemplo n.º 1
0
def main() -> None:
    """
    Initialize tree-sitter library.
    :return: None.
    """
    # root directory for tree-sitter
    tree_sitter_dir = get_tree_sitter_dir()
    # grammar locations
    c_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-c")
    c_sharp_grammar_loc = os.path.join(tree_sitter_dir,
                                       "vendor/tree-sitter-c-sharp")
    cpp_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-cpp")
    java_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-java")
    python_grammar_loc = os.path.join(tree_sitter_dir,
                                      "vendor/tree-sitter-python")
    # location for library
    bin_loc = get_tree_sitter_so()
    # build everything
    Language.build_library(
        # Store the library in the `bin_loc`
        bin_loc,
        # Include languages
        [
            c_grammar_loc, c_sharp_grammar_loc, cpp_grammar_loc,
            java_grammar_loc, python_grammar_loc
        ])
    print("Parser successfully initialized.")
Ejemplo n.º 2
0
def build_so(lib_dir, lang):
    """build so file for certain language with Tree-Sitter"""
    _lib_dir = os.path.expanduser(lib_dir)
    lib_file, _lib_file = os.path.join(lib_dir,
                                       '{}.zip'.format(lang)), os.path.join(
                                           _lib_dir, '{}.zip'.format(lang))
    if os.path.exists(_lib_file):
        LOGGER.info(
            'Tree-Sitter so file for {} does not exists, compiling.'.format(
                lib_file))
        # decompress Tree-Sitter library
        with zipfile.ZipFile(_lib_file, 'r') as zip_file:
            zip_file.extractall(path=_lib_dir)
        so_file, _so_file = os.path.join(lib_dir,
                                         '{}.so'.format(lang)), os.path.join(
                                             _lib_dir, '{}.so'.format(lang))
        LOGGER.info('Building Tree-Sitter compile file {}'.format(so_file))
        Language.build_library(
            # your language parser file, we recommend buidl *.so file for each language
            _so_file,
            # Include one or more languages
            [os.path.join(_lib_dir, 'tree-sitter-{}-master'.format(lang))],
        )
    else:
        LOGGER.info(
            'Tree-Sitter so file for {} exists, ignore it.'.format(lib_file))
Ejemplo n.º 3
0
    def __init__(
            self,
            langs: List[str],
            added_nodes: Dict[str, Dict[str, str]],
            skip_node_types: Dict[str, List[str]],
            vendors_path: Path = Path("./vendor"),
    ):
        super(TreeSitterParser, self).__init__()

        vendors = []
        self.added_nodes = added_nodes
        self.skip_node_types = skip_node_types
        for lang in langs:
            vendors.append(vendors_path / f"tree-sitter-{lang}")
            if lang not in added_nodes:
                self.added_nodes[lang] = {"prefix": "", "suffix": ""}
            if lang not in skip_node_types:
                self.skip_node_types[lang] = []

        Language.build_library(
            # Store the library in the `build` directory
            "build/my-languages.so",
            # Include one or more languages
            vendors,
        )

        self.parser = Parser()
Ejemplo n.º 4
0
def main() -> None:
    """
    Initialize tree-sitter library.

    :return: None
    """
    # root directory for tree-sitter
    tree_sitter_dir = get_tree_sitter_dir()
    # grammar locations
    c_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-c")
    c_sharp_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-c-sharp")
    cpp_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-cpp")
    java_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-java")
    # location for library
    bin_loc = os.path.join(tree_sitter_dir, "build/langs.so")
    # build everything
    Language.build_library(
        # Store the library in the `bin_loc`
        bin_loc,
        # Include languages
        [
            c_grammar_loc,
            c_sharp_grammar_loc,
            cpp_grammar_loc,
            java_grammar_loc
        ]
    )
Ejemplo n.º 5
0
def file_parse(path,name):
    Language.build_library('../build/my-languages.so', ['../tree-sitter-python'])
    PY_LANGUAGE = Language('../build/my-languages.so', 'python')
    parser = Parser()
    parser.set_language(PY_LANGUAGE)
    code = read_file(str(path))
    encoded_code = bytes(code, "utf8")
    tree = parser.parse(encoded_code)
    cursor = tree.walk()
    root_node = tree.root_node

    Graph = nx.DiGraph()
    f= open('result_dot/'+str(name)+'.dot','w') 
    f.write('digraph G{\n')
    f.write('rankdir="LR";\n')
    traverse(root_node,Graph,encoded_code,f)
    global import_lists
    write_together(f,import_lists)


    f.write("}")
    f.close()

    
    #write_in_dot(Graph)
    return None
Ejemplo n.º 6
0
    def __init__(self):
        if not os.path.exists('build/my-languages.so'):
            Language.build_library('build/my-languages.so', [
                'vendor/tree-sitter-c', 'vendor/tree-sitter-cpp',
                'vendor/tree-sitter-c-sharp', 'vendor/tree-sitter-rust',
                'vendor/tree-sitter-javascript', 'vendor/tree-sitter-python'
            ])

        self.ts = Parser()
        self.tree = None
Ejemplo n.º 7
0
def build_libraries(languages: List[str], path):
    # Forcing tree sitter to create new library
    if os.path.isfile(path):
        os.remove(path)
    Language.build_library(
        # Store the library in the `build` directory
        path,
        # Include one or more languages
        languages,
    )
Ejemplo n.º 8
0
 def __init__(self, grammar, language="python", parser_library_path='src/tree-sitter/tree-sitter-python', **kwargs):
     Language.build_library('/build/my-languages.so',[parser_library_path])
     
     LANGUAGE = Language('/build/my-languages.so', language)
     
     self.grammar = grammar
     
     self.TS_parser = Parser()
     self.TS_parser.set_language(LANGUAGE)
     self.node_builder = NodeBuilder(self.grammar)
Ejemplo n.º 9
0
    def __init__(self) -> None:
        # assume submodules exist
        vendor_dirs = ["vendor/tree-sitter-%s" % l for l in TREE_SITTER_LANGS]
        Language.build_library(BUILD_PATH, vendor_dirs)

        self.parsers = {}
        for l in TREE_SITTER_LANGS:
            parser = Parser()
            parser.set_language(Language(BUILD_PATH, "haskell"))
            self.parsers[l] = parser
Ejemplo n.º 10
0
def main() -> None:
    """
    Initialize tree-sitter library.
    :return: None.
    """
    download_grammars()
    grammar_locs = get_grammar_locs()
    bin_loc = get_tree_sitter_so()
    Language.build_library(bin_loc, grammar_locs)
    logging.info("Parser successfully initialized.")
Ejemplo n.º 11
0
def create_parser_builds(path=None):
    Language.build_library(
        # Store the library in the `build` directory
        'build/my-languages.so',

        # Include one or more languages
        ['tree-sitter-java'])

    JAVA_LANGUAGE = Language('build/my-languages.so', 'java')

    return {"java": JAVA_LANGUAGE}
Ejemplo n.º 12
0
def main(file):

    this_directory = os.path.dirname(__file__)
    # filename = os.path.join(this_directory, '/relative/path/to/file/you/want')
    # This code is used to configure parsing tool Tree Sitter
    Language.build_library(
        # Store the library in the `build` directory
        os.path.join(this_directory, 'build/my-languages.so'),

        # Include one or more languages
        [
            # 'vendor/tree-sitter-go',
            os.path.join(this_directory, 'vendor/tree-sitter-java')
            # 'vendor/tree-sitter-python'
        ])
    java_lang = Language(os.path.join(this_directory, 'build/my-languages.so'),
                         'java')

    # Parsing algorithm starts here
    parser = Parser()
    parser.set_language(java_lang)

    # For debugging
    tree_sitter_tree = parser.parse(read_file(file))

    # For production
    # tree_sitter_tree = parser.parse(read_file(file))

    gumtree_ast = to_gumtree_node(tree_sitter_tree.root_node)

    # everything should be inside the tag
    root_node = doc.createElement('root')

    # in test case they have context tag, which is empty. Do not know why we need it
    context_node = doc.createElement('context')

    # We append our root node to document
    doc.appendChild(root_node)

    # Append context tag to root node (<root> </root)
    root_node.appendChild(context_node)

    # append data into <root> tag. At this stage we append parsed code structure.
    root_node.appendChild(gumtree_ast)

    # Recursively add children nodes (if exist)
    process_node(tree_sitter_tree.root_node, gumtree_ast)

    xml = doc.toprettyxml()
    print(xml)
Ejemplo n.º 13
0
def main() -> None:
    """
    Initialize tree-sitter library.
    :return: None.
    """
    # root directory for tree-sitter
    tree_sitter_dir = get_tree_sitter_dir()
    # grammar locations
    javascript_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                          "tree-sitter-javascript")
    java_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                    "tree-sitter-java")
    python_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                      "tree-sitter-python")
    go_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-go")
    cpp_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                   "tree-sitter-cpp")
    ruby_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                    "tree-sitter-ruby")
    typescript_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                          "tree-sitter-typescript",
                                          "typescript")
    tsx_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                   "tree-sitter-typescript", "tsx")
    php_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                   "tree-sitter-php")
    c_sharp_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                       "tree-sitter-c-sharp")
    c_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-c")
    bash_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                    "tree-sitter-bash")
    rust_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                    "tree-sitter-rust")
    # location for library
    bin_loc = get_tree_sitter_so()
    # build everything
    Language.build_library(
        # Store the library in the `bin_loc`
        bin_loc,
        # Include languages
        [
            javascript_grammar_loc, python_grammar_loc, java_grammar_loc,
            go_grammar_loc, cpp_grammar_loc, ruby_grammar_loc,
            typescript_grammar_loc, tsx_grammar_loc, php_grammar_loc,
            c_sharp_grammar_loc, c_grammar_loc, bash_grammar_loc,
            rust_grammar_loc
        ])
    print("Parser successfully initialized.")
Ejemplo n.º 14
0
Archivo: ts.py Proyecto: forest520/csn
    def __init__(self,
                 code,
                 language='python',
                 tree_style='AST',
                 path_style='U2D'):
        # AST | SPT || HST | HPT
        self.tree_style = tree_style
        # L2L | UD | U2D
        self.path_style = path_style
        # Use the Language.build_library method to compile these
        # into a library that's usable from Python:
        csn_so = 'scripts/build/csn.so'
        # Language.build_library(
        #   csn_so,
        #   [
        #     'vendor/tree-sitter-go',
        #     'vendor/tree-sitter-java',
        #     'vendor/tree-sitter-javascript',
        #     'vendor/tree-sitter-php',
        #     'vendor/tree-sitter-python',
        #     'vendor/tree-sitter-ruby',
        #   ]
        # )
        parser = Parser()
        # Load the languages into your app as Language objects:
        # ('go', 'java', 'javascript', 'php', 'python', 'ruby')
        parser.set_language(Language(csn_so, language))
        tree = parser.parse(code.encode())
        code_lines = code.split('\n')
        self.root, self.terminals = self.traverse(tree, code_lines)

        self.debug = True
        if self.debug:
            print(f'{language}{"@" * 9}code\n{code}')
            print(f'{language}{"@" * 9}sexp\n{tree.root_node.sexp()}')
Ejemplo n.º 15
0
    def run(self, tmp_dir, params):
        self.ret = 0
        self.log = ''

        try:
            lib = self.find_lib()

            lang = Language(lib, 'verilog')

            parser = Parser()
            parser.set_language(lang)
        except Exception as e:
            self.log += f'{e}\n'
            self.ret = 1

        for src in params['files']:
            f = None
            try:
                f = open(src, 'rb')
            except IOError:
                self.ret = 1
                self.log_error(src, '', '', 'failed to open file')
                continue

            try:
                tree = parser.parse(f.read())
                if self.walk(tree.root_node, src):
                    self.ret = 1
            except Exception as e:
                self.log_error(src, '', '', 'unknown error: ' + str(e))
                self.ret = 1
        usage = resource.getrusage(resource.RUSAGE_SELF)
        profiling_data = (usage.ru_utime, usage.ru_stime, usage.ru_maxrss)

        return (self.log, self.ret) + profiling_data
Ejemplo n.º 16
0
def corpus_dataflow_match(references, candidates, lang):
    LANGUAGE = Language('parser/my-languages.so', lang)
    parser = Parser()
    parser.set_language(LANGUAGE)
    parser = [parser, dfg_function[lang]]
    match_count = 0
    total_count = 0

    for i in range(len(candidates)):
        references_sample = references[i]
        candidate = candidates[i]
        for reference in references_sample:
            try:
                candidate = remove_comments_and_docstrings(candidate, 'java')
            except:
                pass
            try:
                reference = remove_comments_and_docstrings(reference, 'java')
            except:
                pass

            cand_dfg = get_data_flow(candidate, parser)
            ref_dfg = get_data_flow(reference, parser)

            normalized_cand_dfg = normalize_dataflow(cand_dfg)
            normalized_ref_dfg = normalize_dataflow(ref_dfg)

            if len(normalized_ref_dfg) > 0:
                total_count += len(normalized_ref_dfg)
                for dataflow in normalized_ref_dfg:
                    if dataflow in normalized_cand_dfg:
                        match_count += 1
                        normalized_cand_dfg.remove(dataflow)
    score = match_count / total_count
    return score
Ejemplo n.º 17
0
 def __init__(self, language_type, encoding='utf-8'):
     self.language_type = language_type
     self.encoding = encoding
     self.parser = Parser()
     self.parser.set_language(Language(LIB_BIN, self.language_type))
     self.UpdateBuffer([""])
     self._res = []
Ejemplo n.º 18
0
def language_installed(language):
    if not Path(tree_sitter_build).exists():
        return False
    try:
        Language(tree_sitter_build, language)
        return True
    except:
        return False
Ejemplo n.º 19
0
def get_parser(language):
    language = LANGUAGE_ALIASES.get(language, language)
    if language in PARSERS:
        return PARSERS[language]
    LANGUAGE = Language(tree_sitter_build, language)
    parser = Parser()
    parser.set_language(LANGUAGE)
    PARSERS[language] = parser
    return parser
Ejemplo n.º 20
0
 def __init__(
     self,
     SO_FILE: str,
     LANGUAGE: str,
     to_lower=True,
 ):
     self.parser = Parser()
     self.parser.set_language(Language(SO_FILE, LANGUAGE))
     self.LANGUAGE = LANGUAGE
     self.to_lower = to_lower
Ejemplo n.º 21
0
def get_parser(so_path: str = None) -> Parser:
    if so_path is None:
        so_path = JAVA_SO_PATH

    JAVA_LANGUAGE = Language(so_path, 'java')

    parser = Parser()
    parser.set_language(JAVA_LANGUAGE)

    return parser
Ejemplo n.º 22
0
def install_parsers(languages=None):
    if not languages:
        languages = supported_languages
    if all(language_installed(lang) for lang in languages):
        print(f"Parsers for languages {languages} already installed.")
        return
    wd = os.getcwd()
    os.chdir(PARSER_DIR)
    for lang in languages:
        if lang not in supported_languages:
            raise ValueError(
                f"{lang} not supported. The supported languages are: {', '.join(sorted(supported_languages))}."
            )
        repo = f"tree-sitter-{lang}"
        git_clone(f"https://github.com/tree-sitter/{repo}")
    Language.build_library(tree_sitter_build, [
        str(PARSER_DIR / f"tree-sitter-{lang}") for lang in supported_languages
    ])
    os.chdir(wd)
Ejemplo n.º 23
0
    def build_parser(self):
        url, folder = self.LANG_URL
        repo_dir = Path(fast_trees.__path__[0] + "/" + folder)
        if repo_dir.exists():
            print("Repo already exists, continuing.")
        else:
            print(f"Downloading repo {url} to {repo_dir}.")
            Repo.clone_from(url, repo_dir)

        build_dir = fast_trees.__path__[
            0] + "/" + f"{repo_dir}/build/my-languages.so"
        Language.build_library(
            # Store the library in the `build` directory
            build_dir,
            # Include one or more languages
            [repo_dir],
        )
        self.language = Language(build_dir, self.LANG)
        self.parser = Parser()
        self.parser.set_language(self.language)
Ejemplo n.º 24
0
    def parse_full(self, lang: str, code: str) -> Tuple[List[str], Set[str]]:
        LANGUAGE = Language("build/my-languages.so", lang)
        self.parser.set_language(LANGUAGE)

        code = f"{self.added_nodes[lang]['prefix']} {code} {self.added_nodes[lang]['suffix']}"

        tree = self.parser.parse(bytes(code, "utf8"))
        cursor = tree.walk()

        tokens, special_tokens = self.breadth_first_path(lang, code, cursor, skip_node_types=self.skip_node_types[lang])
        return tokens, special_tokens
Ejemplo n.º 25
0
def make_tree_sitter_lib(args, lang_repo_list):
    """
    Create the library from the repos
    """
    lp = lib_path(args)
    lib_name = args.lib_name
    full_lib_creation_path = f"{lp}/{lib_name}"

    if Path(full_lib_creation_path).exists():
        os.remove(full_lib_creation_path)

    return Language.build_library(full_lib_creation_path, lang_repo_list)
Ejemplo n.º 26
0
 def _get_language_library(self):
     try:
         self.ts_lang_cache_lock.acquire(timeout=300)
         lib = self._get_language_cache_dir() / "language.so"
         repo = self._get_language_repo()
         repodir = self._get_language_repo_path()
         if not lib.exists():
             log.warn(
                 f"building library for {self}, this could take a while...")
             start = time.time()
             Language.build_library(str(lib.resolve()), [repodir])
             log.debug(
                 f"library build of {self} completed after {round(time.time() - start)} seconds"
             )
         return lib
     except filelock.Timeout as e:
         log.error(f"Failed to acquire lock on TSABL {self}")
         log.debug(f"lock object is {self.ts_lang_cache_lock}")
         raise e
     finally:
         self.ts_lang_cache_lock.release()
Ejemplo n.º 27
0
    def parse(self, lang: str, code: str, max_tokens: Optional[int] = None) -> List[str]:
        LANGUAGE = Language("build/my-languages.so", lang)
        self.parser.set_language(LANGUAGE)

        code = f"{self.added_nodes[lang]['prefix']} {code} {self.added_nodes[lang]['suffix']}"

        tree = self.parser.parse(bytes(code, "utf8"))
        cursor = tree.walk()

        tokens = self.breadth_first_path_light(
            lang, code, cursor, skip_node_types=self.skip_node_types[lang], max_tokens=max_tokens
        )
        return tokens
Ejemplo n.º 28
0
    def __init__(self,
                 so_file: str,
                 language: str,
                 operators_file: str = None):
        self.parser = Parser()
        self.parser.set_language(Language(so_file, language))
        self.language = language

        if operators_file is None:
            operators_file = os.path.join(os.path.dirname(__file__),
                                          'operators.json')
        with open(operators_file, 'r') as reader:
            self.operators = ujson.load(reader)
Ejemplo n.º 29
0
def corpus_syntax_match(references, candidates, lang):
    JAVA_LANGUAGE = Language('parser/my-languages.so', lang)
    parser = Parser()
    parser.set_language(JAVA_LANGUAGE)
    match_count = 0
    total_count = 0

    for i in range(len(candidates)):
        references_sample = references[i]
        candidate = candidates[i]
        for reference in references_sample:
            try:
                candidate = remove_comments_and_docstrings(candidate, 'java')
            except:
                pass
            try:
                reference = remove_comments_and_docstrings(reference, 'java')
            except:
                pass

            candidate_tree = parser.parse(bytes(candidate, 'utf8')).root_node

            reference_tree = parser.parse(bytes(reference, 'utf8')).root_node

            def get_all_sub_trees(root_node):
                node_stack = []
                sub_tree_sexp_list = []
                depth = 1
                node_stack.append([root_node, depth])
                while len(node_stack) != 0:
                    cur_node, cur_depth = node_stack.pop()
                    sub_tree_sexp_list.append([cur_node.sexp(), cur_depth])
                    for child_node in cur_node.children:
                        if len(child_node.children) != 0:
                            depth = cur_depth + 1
                            node_stack.append([child_node, depth])
                return sub_tree_sexp_list

            cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)]
            ref_sexps = get_all_sub_trees(reference_tree)

            # print(cand_sexps)
            # print(ref_sexps)

            for sub_tree, depth in ref_sexps:
                if sub_tree in cand_sexps:
                    match_count += 1
            total_count += len(ref_sexps)

    score = match_count / total_count
    return score
Ejemplo n.º 30
0
    def __init__(self,
                 language: str,
                 query_class_name: str,
                 query_file_path: str,
                 library_loc: str = None):
        if os.getenv("TS_LIB_PATH") is not None and library_loc is None:
            library_loc = os.getenv("TS_LIB_PATH")

        if not library_loc:
            raise ParserLibraryNotFoundError(
                "Parser library path is 'None'. Please either set up the environment or call the constructor with the path"
            )

        if not Path(library_loc).exists() or not Path(library_loc).is_file():
            raise ParserLibraryNotFoundError(
                f"Parser library '{library_loc}' not found. Did you set up the environement variables?"
            )

        self.language = Language(library_loc, language)
        self.parser = Parser()
        self.parser.set_language(self.language)
        self.qclass = Query.fromFile(query_file_path)
        self.QUERIES = self.qclass[query_class_name]