def main() -> None: """ Initialize tree-sitter library. :return: None. """ # root directory for tree-sitter tree_sitter_dir = get_tree_sitter_dir() # grammar locations c_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-c") c_sharp_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-c-sharp") cpp_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-cpp") java_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-java") python_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-python") # location for library bin_loc = get_tree_sitter_so() # build everything Language.build_library( # Store the library in the `bin_loc` bin_loc, # Include languages [ c_grammar_loc, c_sharp_grammar_loc, cpp_grammar_loc, java_grammar_loc, python_grammar_loc ]) print("Parser successfully initialized.")
def __init__( self, langs: List[str], added_nodes: Dict[str, Dict[str, str]], skip_node_types: Dict[str, List[str]], vendors_path: Path = Path("./vendor"), ): super(TreeSitterParser, self).__init__() vendors = [] self.added_nodes = added_nodes self.skip_node_types = skip_node_types for lang in langs: vendors.append(vendors_path / f"tree-sitter-{lang}") if lang not in added_nodes: self.added_nodes[lang] = {"prefix": "", "suffix": ""} if lang not in skip_node_types: self.skip_node_types[lang] = [] Language.build_library( # Store the library in the `build` directory "build/my-languages.so", # Include one or more languages vendors, ) self.parser = Parser()
def file_parse(path,name): Language.build_library('../build/my-languages.so', ['../tree-sitter-python']) PY_LANGUAGE = Language('../build/my-languages.so', 'python') parser = Parser() parser.set_language(PY_LANGUAGE) code = read_file(str(path)) encoded_code = bytes(code, "utf8") tree = parser.parse(encoded_code) cursor = tree.walk() root_node = tree.root_node Graph = nx.DiGraph() f= open('result_dot/'+str(name)+'.dot','w') f.write('digraph G{\n') f.write('rankdir="LR";\n') traverse(root_node,Graph,encoded_code,f) global import_lists write_together(f,import_lists) f.write("}") f.close() #write_in_dot(Graph) return None
def main() -> None: """ Initialize tree-sitter library. :return: None """ # root directory for tree-sitter tree_sitter_dir = get_tree_sitter_dir() # grammar locations c_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-c") c_sharp_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-c-sharp") cpp_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-cpp") java_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-java") # location for library bin_loc = os.path.join(tree_sitter_dir, "build/langs.so") # build everything Language.build_library( # Store the library in the `bin_loc` bin_loc, # Include languages [ c_grammar_loc, c_sharp_grammar_loc, cpp_grammar_loc, java_grammar_loc ] )
def build_so(lib_dir, lang): """build so file for certain language with Tree-Sitter""" _lib_dir = os.path.expanduser(lib_dir) lib_file, _lib_file = os.path.join(lib_dir, '{}.zip'.format(lang)), os.path.join( _lib_dir, '{}.zip'.format(lang)) if os.path.exists(_lib_file): LOGGER.info( 'Tree-Sitter so file for {} does not exists, compiling.'.format( lib_file)) # decompress Tree-Sitter library with zipfile.ZipFile(_lib_file, 'r') as zip_file: zip_file.extractall(path=_lib_dir) so_file, _so_file = os.path.join(lib_dir, '{}.so'.format(lang)), os.path.join( _lib_dir, '{}.so'.format(lang)) LOGGER.info('Building Tree-Sitter compile file {}'.format(so_file)) Language.build_library( # your language parser file, we recommend buidl *.so file for each language _so_file, # Include one or more languages [os.path.join(_lib_dir, 'tree-sitter-{}-master'.format(lang))], ) else: LOGGER.info( 'Tree-Sitter so file for {} exists, ignore it.'.format(lib_file))
def __init__(self) -> None: # assume submodules exist vendor_dirs = ["vendor/tree-sitter-%s" % l for l in TREE_SITTER_LANGS] Language.build_library(BUILD_PATH, vendor_dirs) self.parsers = {} for l in TREE_SITTER_LANGS: parser = Parser() parser.set_language(Language(BUILD_PATH, "haskell")) self.parsers[l] = parser
def __init__(self, grammar, language="python", parser_library_path='src/tree-sitter/tree-sitter-python', **kwargs): Language.build_library('/build/my-languages.so',[parser_library_path]) LANGUAGE = Language('/build/my-languages.so', language) self.grammar = grammar self.TS_parser = Parser() self.TS_parser.set_language(LANGUAGE) self.node_builder = NodeBuilder(self.grammar)
def main() -> None: """ Initialize tree-sitter library. :return: None. """ download_grammars() grammar_locs = get_grammar_locs() bin_loc = get_tree_sitter_so() Language.build_library(bin_loc, grammar_locs) logging.info("Parser successfully initialized.")
def build_libraries(languages: List[str], path): # Forcing tree sitter to create new library if os.path.isfile(path): os.remove(path) Language.build_library( # Store the library in the `build` directory path, # Include one or more languages languages, )
def __init__(self): if not os.path.exists('build/my-languages.so'): Language.build_library('build/my-languages.so', [ 'vendor/tree-sitter-c', 'vendor/tree-sitter-cpp', 'vendor/tree-sitter-c-sharp', 'vendor/tree-sitter-rust', 'vendor/tree-sitter-javascript', 'vendor/tree-sitter-python' ]) self.ts = Parser() self.tree = None
def create_parser_builds(path=None): Language.build_library( # Store the library in the `build` directory 'build/my-languages.so', # Include one or more languages ['tree-sitter-java']) JAVA_LANGUAGE = Language('build/my-languages.so', 'java') return {"java": JAVA_LANGUAGE}
def main(file): this_directory = os.path.dirname(__file__) # filename = os.path.join(this_directory, '/relative/path/to/file/you/want') # This code is used to configure parsing tool Tree Sitter Language.build_library( # Store the library in the `build` directory os.path.join(this_directory, 'build/my-languages.so'), # Include one or more languages [ # 'vendor/tree-sitter-go', os.path.join(this_directory, 'vendor/tree-sitter-java') # 'vendor/tree-sitter-python' ]) java_lang = Language(os.path.join(this_directory, 'build/my-languages.so'), 'java') # Parsing algorithm starts here parser = Parser() parser.set_language(java_lang) # For debugging tree_sitter_tree = parser.parse(read_file(file)) # For production # tree_sitter_tree = parser.parse(read_file(file)) gumtree_ast = to_gumtree_node(tree_sitter_tree.root_node) # everything should be inside the tag root_node = doc.createElement('root') # in test case they have context tag, which is empty. Do not know why we need it context_node = doc.createElement('context') # We append our root node to document doc.appendChild(root_node) # Append context tag to root node (<root> </root) root_node.appendChild(context_node) # append data into <root> tag. At this stage we append parsed code structure. root_node.appendChild(gumtree_ast) # Recursively add children nodes (if exist) process_node(tree_sitter_tree.root_node, gumtree_ast) xml = doc.toprettyxml() print(xml)
def main() -> None: """ Initialize tree-sitter library. :return: None. """ # root directory for tree-sitter tree_sitter_dir = get_tree_sitter_dir() # grammar locations javascript_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-javascript") java_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-java") python_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-python") go_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-go") cpp_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-cpp") ruby_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-ruby") typescript_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-typescript", "typescript") tsx_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-typescript", "tsx") php_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-php") c_sharp_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-c-sharp") c_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-c") bash_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-bash") rust_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-rust") # location for library bin_loc = get_tree_sitter_so() # build everything Language.build_library( # Store the library in the `bin_loc` bin_loc, # Include languages [ javascript_grammar_loc, python_grammar_loc, java_grammar_loc, go_grammar_loc, cpp_grammar_loc, ruby_grammar_loc, typescript_grammar_loc, tsx_grammar_loc, php_grammar_loc, c_sharp_grammar_loc, c_grammar_loc, bash_grammar_loc, rust_grammar_loc ]) print("Parser successfully initialized.")
def install_parsers(languages=None): if not languages: languages = supported_languages if all(language_installed(lang) for lang in languages): print(f"Parsers for languages {languages} already installed.") return wd = os.getcwd() os.chdir(PARSER_DIR) for lang in languages: if lang not in supported_languages: raise ValueError( f"{lang} not supported. The supported languages are: {', '.join(sorted(supported_languages))}." ) repo = f"tree-sitter-{lang}" git_clone(f"https://github.com/tree-sitter/{repo}") Language.build_library(tree_sitter_build, [ str(PARSER_DIR / f"tree-sitter-{lang}") for lang in supported_languages ]) os.chdir(wd)
def build_parser(self): url, folder = self.LANG_URL repo_dir = Path(fast_trees.__path__[0] + "/" + folder) if repo_dir.exists(): print("Repo already exists, continuing.") else: print(f"Downloading repo {url} to {repo_dir}.") Repo.clone_from(url, repo_dir) build_dir = fast_trees.__path__[ 0] + "/" + f"{repo_dir}/build/my-languages.so" Language.build_library( # Store the library in the `build` directory build_dir, # Include one or more languages [repo_dir], ) self.language = Language(build_dir, self.LANG) self.parser = Parser() self.parser.set_language(self.language)
def _get_language_library(self): try: self.ts_lang_cache_lock.acquire(timeout=300) lib = self._get_language_cache_dir() / "language.so" repo = self._get_language_repo() repodir = self._get_language_repo_path() if not lib.exists(): log.warn( f"building library for {self}, this could take a while...") start = time.time() Language.build_library(str(lib.resolve()), [repodir]) log.debug( f"library build of {self} completed after {round(time.time() - start)} seconds" ) return lib except filelock.Timeout as e: log.error(f"Failed to acquire lock on TSABL {self}") log.debug(f"lock object is {self.ts_lang_cache_lock}") raise e finally: self.ts_lang_cache_lock.release()
def make_tree_sitter_lib(args, lang_repo_list): """ Create the library from the repos """ lp = lib_path(args) lib_name = args.lib_name full_lib_creation_path = f"{lp}/{lib_name}" if Path(full_lib_creation_path).exists(): os.remove(full_lib_creation_path) return Language.build_library(full_lib_creation_path, lang_repo_list)
from javim.buffer_change import BufferChangeListener, BufferChangeDispatcher from javim.util_classes import OffsetChain, ReplaceRangeOffsetChainUpdate, DeleteOffsetChainUpdate, DelayedAction import javim import treelib as tl from tempfile import mkstemp from os import remove from os.path import exists from threading import Lock from tree_sitter import Language, Parser, Tree Language.build_library("build/langs.so", ["/home/friese/git/tree-sitter-java"]) JAVA_LANG = Language("build/langs.so", "java") parser = Parser() parser.set_language(JAVA_LANG) _, tree_file = mkstemp(suffix="tree") tree_lock = Lock() def tree2file(tree: Tree): if exists(tree_file): remove(tree_file) def gen_index(): i = 1 while True: yield i i += 1 index = gen_index()
if not os.path.exists(LIBRARY_DIR): logging.warning("TreeSitter has not been compiled. Cloning languages and building...") from git import Repo with TemporaryDirectory() as dir: # Clone all repos above at the given tag repo_dirs = [] for lang, (url, suffix) in _LANGUAGE_REPOS.items(): logging.warning(f"Cloning `{lang}`...") repo_dir = os.path.join(dir, lang) repo = Repo.clone_from(url, repo_dir) repo.git.checkout(TREE_SITTER_LANG_VER) repo_dirs.append(os.path.join(repo_dir, suffix)) # Build library by pointing to each repo logging.warning(f"Building Tree-sitter Library...") Language.build_library(LIBRARY_DIR, repo_dirs) _LANGUAGES = {} for language in _LANGUAGE_REPOS: _LANGUAGES[language] = Language(LIBRARY_DIR, language) # Add aliases _ALIASES = { "c++": "cpp", "c#": "c_sharp", "csharp": "c_sharp", "js": "javascript", "ts": "typescript" } for alias, target in _ALIASES.items(): _LANGUAGES[alias] = _LANGUAGES[target]
from tree_sitter import Language from processing.utils import tree_sitter_so, tree_sitter_languages Language.build_library( # Store the library in the `build` directory tree_sitter_so(), # Include one or more languages [ tree_sitter_languages('tree-sitter-go'), tree_sitter_languages('tree-sitter-java'), tree_sitter_languages('tree-sitter-javascript'), tree_sitter_languages('tree-sitter-php'), tree_sitter_languages('tree-sitter-python'), tree_sitter_languages('tree-sitter-ruby'), ])
# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. from tree_sitter import Language, Parser Language.build_library( # Store the library in the `build` directory 'my-languages.so', # Include one or more languages [ 'tree-sitter-go', 'tree-sitter-javascript', 'tree-sitter-python', 'tree-sitter-php', 'tree-sitter-java', 'tree-sitter-ruby', 'tree-sitter-c-sharp', ])
from tree_sitter import Language, Parser Language.build_library( # Store the library in the `build` directory 'build/csn.so', # Include one or more languages [ 'vendor/tree-sitter-go', 'vendor/tree-sitter-java', 'vendor/tree-sitter-javascript', 'vendor/tree-sitter-php', 'vendor/tree-sitter-python', 'vendor/tree-sitter-ruby', ] )
return false; } """[1:-1]) subprocess.run(["npm", "install"], cwd=grammar_path, check=True) subprocess.run(["npx", "tree-sitter", "generate"], cwd=grammar_path, check=True) # Following are commented for future reference to expose playground # Remove "--docker" if local environment matches with the container # subprocess.run(["npx", "tree-sitter", "build-wasm", "--docker"], # cwd=grammar_path, check=True) Language.build_library(grammar_path + "/build/wgsl.so", [ grammar_path, ]) WGSL_LANGUAGE = Language(grammar_path + "/build/wgsl.so", "wgsl") parser = Parser() parser.set_language(WGSL_LANGUAGE) error_list = [] for key, value in scanner_components[scanner_example.name()].items(): if "expect-error" in key: continue value = value[:] if "function-scope" in key: value = ["fn function__scope____() {"] + value + ["}"]
# pylint: disable=missing-docstring import re from unittest import TestCase from os import path from tree_sitter import Language, Parser LIB_PATH = path.join("build", "languages.so") Language.build_library( LIB_PATH, [ path.join("tests", "fixtures", "tree-sitter-python"), path.join("tests", "fixtures", "tree-sitter-javascript"), ], ) PYTHON = Language(LIB_PATH, "python") JAVASCRIPT = Language(LIB_PATH, "javascript") class TestParser(TestCase): def test_set_language(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") self.assertEqual( tree.root_node.sexp(), trim("""(module (function_definition name: (identifier) parameters: (parameters) body: (block (expression_statement (call function: (identifier)
LANGS = {"java" : "https://github.com/tree-sitter/tree-sitter-java.git", "c" : "https://github.com/tree-sitter/tree-sitter-c.git"} LIB = "tslib.so" ## ## ## from os import chdir from pathlib import Path from subprocess import check_output, STDOUT from tempfile import TemporaryDirectory from tree_sitter import Language target = Path(__file__).parent.absolute() / "badass/lang" / LIB build = Path("build") build.mkdir(exist_ok=True, parents=True) chdir(build) repos = [] for name, repo in LANGS.items() : local = Path(repo).stem if not Path(local).exists() : print("cloning", repo) check_output(["git", "clone", repo, local], stderr=STDOUT) repos.append(local) print("building", target) if target.exists() : target.unlink() assert Language.build_library(str(target), repos)
import glob from tree_sitter import Language languages = [ '/src/vendor/tree-sitter-python', '/src/vendor/tree-sitter-javascript', # '/src/vendor/tree-sitter-typescript/typescript', # '/src/vendor/tree-sitter-typescript/tsx', '/src/vendor/tree-sitter-go', '/src/vendor/tree-sitter-ruby', '/src/vendor/tree-sitter-java', '/src/vendor/tree-sitter-cpp', '/src/vendor/tree-sitter-php', ] Language.build_library( # Store the library in the directory '/src/build/py-tree-sitter-languages.so', # Include one or more languages languages)
import os from tree_sitter import Language from code_search import shared from code_search.function_parser.utils import get_tree_sitter_languages_lib languages = [ os.path.join(shared.VENDOR_DIR, 'tree-sitter-python'), os.path.join(shared.VENDOR_DIR, 'tree-sitter-javascript'), os.path.join(shared.VENDOR_DIR, 'tree-sitter-go'), os.path.join(shared.VENDOR_DIR, 'tree-sitter-ruby'), os.path.join(shared.VENDOR_DIR, 'tree-sitter-java'), os.path.join(shared.VENDOR_DIR, 'tree-sitter-php') ] Language.build_library( # Store the library in the directory get_tree_sitter_languages_lib(), # Include one or more languages languages)
# -*- coding: utf-8 -*- # # Copyright © 2019 Stephan Seitz <*****@*****.**> # # Distributed under terms of the GPLv3 license. """ """ from tree_sitter import Language, Parser from queue import SimpleQueue Language.build_library( # Store the library in the `build` directory 'build/my-languages.so', # Include one or more languages [ 'tree-sitter-cpp', ]) CPP_LANGUAGE = Language('build/my-languages.so', 'cpp') parser = Parser() parser.set_language(CPP_LANGUAGE) tree = parser.parse( bytes( """ #include <iostream> #include <cstdlib> auto main( int argc, char** argv ) -> int
import streamlit as st from transformers import AutoTokenizer, AutoModelWithLMHead, SummarizationPipeline import torch from tree_sitter import Language, Parser import os import base64 from io import StringIO Language.build_library('build/my-languages.so', ['tree-sitter-python', 'tree-sitter-go', 'tree-sitter-javascript', 'tree-sitter-java']) def build_parser(language): LANGUAGE = Language('build/my-languages.so', language) parser = Parser() parser.set_language(LANGUAGE) return parser def get_string_from_code(node, lines, code_list): line_start = node.start_point[0] line_end = node.end_point[0] char_start = node.start_point[1] char_end = node.end_point[1] if line_start != line_end: code_list.append(' '.join([lines[line_start][char_start:]] + lines[line_start+1:line_end] + [lines[line_end][:char_end]])) else: code_list.append(lines[line_start][char_start:char_end]) def my_traverse(code, node, code_list): lines = code.split('\n') if node.child_count == 0: get_string_from_code(node, lines, code_list) elif node.type == 'string':
import os import sys import ast import inspect from tree_sitter import Language, Parser tree_sitter_python_file = os.path.join( os.path.dirname(inspect.getfile(sys.modules[__name__])), 'tree-sitter-python.so') if not os.path.isfile(tree_sitter_python_file): os.system( 'git clone --depth 1 https://github.com/tree-sitter/tree-sitter-python' ) Language.build_library(tree_sitter_python_file, ['tree-sitter-python']) os.system('rm -rf tree-sitter-python') PY_LANGUAGE = Language(tree_sitter_python_file, 'python') parser = Parser() parser.set_language(PY_LANGUAGE) operator_registry = {} def get_or_create_operator(identifier): if identifier in operator_registry: return operator_registry[identifier] return Operator(identifier) class Operator: def __init__(self, identifier): operator_registry[identifier] = self