Esempio n. 1
0
def main() -> None:
    """
    Initialize tree-sitter library.
    :return: None.
    """
    # root directory for tree-sitter
    tree_sitter_dir = get_tree_sitter_dir()
    # grammar locations
    c_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-c")
    c_sharp_grammar_loc = os.path.join(tree_sitter_dir,
                                       "vendor/tree-sitter-c-sharp")
    cpp_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-cpp")
    java_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-java")
    python_grammar_loc = os.path.join(tree_sitter_dir,
                                      "vendor/tree-sitter-python")
    # location for library
    bin_loc = get_tree_sitter_so()
    # build everything
    Language.build_library(
        # Store the library in the `bin_loc`
        bin_loc,
        # Include languages
        [
            c_grammar_loc, c_sharp_grammar_loc, cpp_grammar_loc,
            java_grammar_loc, python_grammar_loc
        ])
    print("Parser successfully initialized.")
Esempio n. 2
0
    def __init__(
            self,
            langs: List[str],
            added_nodes: Dict[str, Dict[str, str]],
            skip_node_types: Dict[str, List[str]],
            vendors_path: Path = Path("./vendor"),
    ):
        super(TreeSitterParser, self).__init__()

        vendors = []
        self.added_nodes = added_nodes
        self.skip_node_types = skip_node_types
        for lang in langs:
            vendors.append(vendors_path / f"tree-sitter-{lang}")
            if lang not in added_nodes:
                self.added_nodes[lang] = {"prefix": "", "suffix": ""}
            if lang not in skip_node_types:
                self.skip_node_types[lang] = []

        Language.build_library(
            # Store the library in the `build` directory
            "build/my-languages.so",
            # Include one or more languages
            vendors,
        )

        self.parser = Parser()
Esempio n. 3
0
def file_parse(path,name):
    Language.build_library('../build/my-languages.so', ['../tree-sitter-python'])
    PY_LANGUAGE = Language('../build/my-languages.so', 'python')
    parser = Parser()
    parser.set_language(PY_LANGUAGE)
    code = read_file(str(path))
    encoded_code = bytes(code, "utf8")
    tree = parser.parse(encoded_code)
    cursor = tree.walk()
    root_node = tree.root_node

    Graph = nx.DiGraph()
    f= open('result_dot/'+str(name)+'.dot','w') 
    f.write('digraph G{\n')
    f.write('rankdir="LR";\n')
    traverse(root_node,Graph,encoded_code,f)
    global import_lists
    write_together(f,import_lists)


    f.write("}")
    f.close()

    
    #write_in_dot(Graph)
    return None
Esempio n. 4
0
def main() -> None:
    """
    Initialize tree-sitter library.

    :return: None
    """
    # root directory for tree-sitter
    tree_sitter_dir = get_tree_sitter_dir()
    # grammar locations
    c_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-c")
    c_sharp_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-c-sharp")
    cpp_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-cpp")
    java_grammar_loc = os.path.join(tree_sitter_dir, "tree-sitter-java")
    # location for library
    bin_loc = os.path.join(tree_sitter_dir, "build/langs.so")
    # build everything
    Language.build_library(
        # Store the library in the `bin_loc`
        bin_loc,
        # Include languages
        [
            c_grammar_loc,
            c_sharp_grammar_loc,
            cpp_grammar_loc,
            java_grammar_loc
        ]
    )
Esempio n. 5
0
def build_so(lib_dir, lang):
    """build so file for certain language with Tree-Sitter"""
    _lib_dir = os.path.expanduser(lib_dir)
    lib_file, _lib_file = os.path.join(lib_dir,
                                       '{}.zip'.format(lang)), os.path.join(
                                           _lib_dir, '{}.zip'.format(lang))
    if os.path.exists(_lib_file):
        LOGGER.info(
            'Tree-Sitter so file for {} does not exists, compiling.'.format(
                lib_file))
        # decompress Tree-Sitter library
        with zipfile.ZipFile(_lib_file, 'r') as zip_file:
            zip_file.extractall(path=_lib_dir)
        so_file, _so_file = os.path.join(lib_dir,
                                         '{}.so'.format(lang)), os.path.join(
                                             _lib_dir, '{}.so'.format(lang))
        LOGGER.info('Building Tree-Sitter compile file {}'.format(so_file))
        Language.build_library(
            # your language parser file, we recommend buidl *.so file for each language
            _so_file,
            # Include one or more languages
            [os.path.join(_lib_dir, 'tree-sitter-{}-master'.format(lang))],
        )
    else:
        LOGGER.info(
            'Tree-Sitter so file for {} exists, ignore it.'.format(lib_file))
Esempio n. 6
0
    def __init__(self) -> None:
        # assume submodules exist
        vendor_dirs = ["vendor/tree-sitter-%s" % l for l in TREE_SITTER_LANGS]
        Language.build_library(BUILD_PATH, vendor_dirs)

        self.parsers = {}
        for l in TREE_SITTER_LANGS:
            parser = Parser()
            parser.set_language(Language(BUILD_PATH, "haskell"))
            self.parsers[l] = parser
Esempio n. 7
0
 def __init__(self, grammar, language="python", parser_library_path='src/tree-sitter/tree-sitter-python', **kwargs):
     Language.build_library('/build/my-languages.so',[parser_library_path])
     
     LANGUAGE = Language('/build/my-languages.so', language)
     
     self.grammar = grammar
     
     self.TS_parser = Parser()
     self.TS_parser.set_language(LANGUAGE)
     self.node_builder = NodeBuilder(self.grammar)
Esempio n. 8
0
def main() -> None:
    """
    Initialize tree-sitter library.
    :return: None.
    """
    download_grammars()
    grammar_locs = get_grammar_locs()
    bin_loc = get_tree_sitter_so()
    Language.build_library(bin_loc, grammar_locs)
    logging.info("Parser successfully initialized.")
Esempio n. 9
0
def build_libraries(languages: List[str], path):
    # Forcing tree sitter to create new library
    if os.path.isfile(path):
        os.remove(path)
    Language.build_library(
        # Store the library in the `build` directory
        path,
        # Include one or more languages
        languages,
    )
Esempio n. 10
0
    def __init__(self):
        if not os.path.exists('build/my-languages.so'):
            Language.build_library('build/my-languages.so', [
                'vendor/tree-sitter-c', 'vendor/tree-sitter-cpp',
                'vendor/tree-sitter-c-sharp', 'vendor/tree-sitter-rust',
                'vendor/tree-sitter-javascript', 'vendor/tree-sitter-python'
            ])

        self.ts = Parser()
        self.tree = None
Esempio n. 11
0
def create_parser_builds(path=None):
    Language.build_library(
        # Store the library in the `build` directory
        'build/my-languages.so',

        # Include one or more languages
        ['tree-sitter-java'])

    JAVA_LANGUAGE = Language('build/my-languages.so', 'java')

    return {"java": JAVA_LANGUAGE}
Esempio n. 12
0
def main(file):

    this_directory = os.path.dirname(__file__)
    # filename = os.path.join(this_directory, '/relative/path/to/file/you/want')
    # This code is used to configure parsing tool Tree Sitter
    Language.build_library(
        # Store the library in the `build` directory
        os.path.join(this_directory, 'build/my-languages.so'),

        # Include one or more languages
        [
            # 'vendor/tree-sitter-go',
            os.path.join(this_directory, 'vendor/tree-sitter-java')
            # 'vendor/tree-sitter-python'
        ])
    java_lang = Language(os.path.join(this_directory, 'build/my-languages.so'),
                         'java')

    # Parsing algorithm starts here
    parser = Parser()
    parser.set_language(java_lang)

    # For debugging
    tree_sitter_tree = parser.parse(read_file(file))

    # For production
    # tree_sitter_tree = parser.parse(read_file(file))

    gumtree_ast = to_gumtree_node(tree_sitter_tree.root_node)

    # everything should be inside the tag
    root_node = doc.createElement('root')

    # in test case they have context tag, which is empty. Do not know why we need it
    context_node = doc.createElement('context')

    # We append our root node to document
    doc.appendChild(root_node)

    # Append context tag to root node (<root> </root)
    root_node.appendChild(context_node)

    # append data into <root> tag. At this stage we append parsed code structure.
    root_node.appendChild(gumtree_ast)

    # Recursively add children nodes (if exist)
    process_node(tree_sitter_tree.root_node, gumtree_ast)

    xml = doc.toprettyxml()
    print(xml)
Esempio n. 13
0
def main() -> None:
    """
    Initialize tree-sitter library.
    :return: None.
    """
    # root directory for tree-sitter
    tree_sitter_dir = get_tree_sitter_dir()
    # grammar locations
    javascript_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                          "tree-sitter-javascript")
    java_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                    "tree-sitter-java")
    python_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                      "tree-sitter-python")
    go_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-go")
    cpp_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                   "tree-sitter-cpp")
    ruby_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                    "tree-sitter-ruby")
    typescript_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                          "tree-sitter-typescript",
                                          "typescript")
    tsx_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                   "tree-sitter-typescript", "tsx")
    php_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                   "tree-sitter-php")
    c_sharp_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                       "tree-sitter-c-sharp")
    c_grammar_loc = os.path.join(tree_sitter_dir, "vendor", "tree-sitter-c")
    bash_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                    "tree-sitter-bash")
    rust_grammar_loc = os.path.join(tree_sitter_dir, "vendor",
                                    "tree-sitter-rust")
    # location for library
    bin_loc = get_tree_sitter_so()
    # build everything
    Language.build_library(
        # Store the library in the `bin_loc`
        bin_loc,
        # Include languages
        [
            javascript_grammar_loc, python_grammar_loc, java_grammar_loc,
            go_grammar_loc, cpp_grammar_loc, ruby_grammar_loc,
            typescript_grammar_loc, tsx_grammar_loc, php_grammar_loc,
            c_sharp_grammar_loc, c_grammar_loc, bash_grammar_loc,
            rust_grammar_loc
        ])
    print("Parser successfully initialized.")
Esempio n. 14
0
def install_parsers(languages=None):
    if not languages:
        languages = supported_languages
    if all(language_installed(lang) for lang in languages):
        print(f"Parsers for languages {languages} already installed.")
        return
    wd = os.getcwd()
    os.chdir(PARSER_DIR)
    for lang in languages:
        if lang not in supported_languages:
            raise ValueError(
                f"{lang} not supported. The supported languages are: {', '.join(sorted(supported_languages))}."
            )
        repo = f"tree-sitter-{lang}"
        git_clone(f"https://github.com/tree-sitter/{repo}")
    Language.build_library(tree_sitter_build, [
        str(PARSER_DIR / f"tree-sitter-{lang}") for lang in supported_languages
    ])
    os.chdir(wd)
Esempio n. 15
0
    def build_parser(self):
        url, folder = self.LANG_URL
        repo_dir = Path(fast_trees.__path__[0] + "/" + folder)
        if repo_dir.exists():
            print("Repo already exists, continuing.")
        else:
            print(f"Downloading repo {url} to {repo_dir}.")
            Repo.clone_from(url, repo_dir)

        build_dir = fast_trees.__path__[
            0] + "/" + f"{repo_dir}/build/my-languages.so"
        Language.build_library(
            # Store the library in the `build` directory
            build_dir,
            # Include one or more languages
            [repo_dir],
        )
        self.language = Language(build_dir, self.LANG)
        self.parser = Parser()
        self.parser.set_language(self.language)
Esempio n. 16
0
 def _get_language_library(self):
     try:
         self.ts_lang_cache_lock.acquire(timeout=300)
         lib = self._get_language_cache_dir() / "language.so"
         repo = self._get_language_repo()
         repodir = self._get_language_repo_path()
         if not lib.exists():
             log.warn(
                 f"building library for {self}, this could take a while...")
             start = time.time()
             Language.build_library(str(lib.resolve()), [repodir])
             log.debug(
                 f"library build of {self} completed after {round(time.time() - start)} seconds"
             )
         return lib
     except filelock.Timeout as e:
         log.error(f"Failed to acquire lock on TSABL {self}")
         log.debug(f"lock object is {self.ts_lang_cache_lock}")
         raise e
     finally:
         self.ts_lang_cache_lock.release()
Esempio n. 17
0
def make_tree_sitter_lib(args, lang_repo_list):
    """
    Create the library from the repos
    """
    lp = lib_path(args)
    lib_name = args.lib_name
    full_lib_creation_path = f"{lp}/{lib_name}"

    if Path(full_lib_creation_path).exists():
        os.remove(full_lib_creation_path)

    return Language.build_library(full_lib_creation_path, lang_repo_list)
Esempio n. 18
0
from javim.buffer_change import BufferChangeListener, BufferChangeDispatcher
from javim.util_classes import OffsetChain, ReplaceRangeOffsetChainUpdate, DeleteOffsetChainUpdate, DelayedAction
import javim
import treelib as tl
from tempfile import mkstemp
from os import remove
from os.path import exists
from threading import Lock

from tree_sitter import Language, Parser, Tree

Language.build_library("build/langs.so", ["/home/friese/git/tree-sitter-java"])
JAVA_LANG = Language("build/langs.so", "java")
parser = Parser()
parser.set_language(JAVA_LANG)

_, tree_file = mkstemp(suffix="tree")

tree_lock = Lock()


def tree2file(tree: Tree):
    if exists(tree_file): remove(tree_file)

    def gen_index():
        i = 1
        while True:
            yield i
            i += 1

    index = gen_index()
Esempio n. 19
0
if not os.path.exists(LIBRARY_DIR):
    logging.warning("TreeSitter has not been compiled. Cloning languages and building...")
    from git import Repo
    with TemporaryDirectory() as dir:
        # Clone all repos above at the given tag
        repo_dirs = []
        for lang, (url, suffix) in _LANGUAGE_REPOS.items():
            logging.warning(f"Cloning `{lang}`...")
            repo_dir = os.path.join(dir, lang)
            repo = Repo.clone_from(url, repo_dir)
            repo.git.checkout(TREE_SITTER_LANG_VER)
            repo_dirs.append(os.path.join(repo_dir, suffix))

        # Build library by pointing to each repo
        logging.warning(f"Building Tree-sitter Library...")
        Language.build_library(LIBRARY_DIR, repo_dirs)

_LANGUAGES = {}
for language in _LANGUAGE_REPOS:
    _LANGUAGES[language] = Language(LIBRARY_DIR, language)

# Add aliases
_ALIASES = {
    "c++": "cpp",
    "c#": "c_sharp",
    "csharp": "c_sharp",
    "js": "javascript",
    "ts": "typescript"
}
for alias, target in _ALIASES.items():
    _LANGUAGES[alias] = _LANGUAGES[target]
from tree_sitter import Language

from processing.utils import tree_sitter_so, tree_sitter_languages

Language.build_library(
    # Store the library in the `build` directory
    tree_sitter_so(),

    # Include one or more languages
    [
        tree_sitter_languages('tree-sitter-go'),
        tree_sitter_languages('tree-sitter-java'),
        tree_sitter_languages('tree-sitter-javascript'),
        tree_sitter_languages('tree-sitter-php'),
        tree_sitter_languages('tree-sitter-python'),
        tree_sitter_languages('tree-sitter-ruby'),
    ])
Esempio n. 21
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from tree_sitter import Language, Parser

Language.build_library(
    # Store the library in the `build` directory
    'my-languages.so',

    # Include one or more languages
    [
        'tree-sitter-go',
        'tree-sitter-javascript',
        'tree-sitter-python',
        'tree-sitter-php',
        'tree-sitter-java',
        'tree-sitter-ruby',
        'tree-sitter-c-sharp',
    ])
Esempio n. 22
0
from tree_sitter import Language, Parser

Language.build_library(
  # Store the library in the `build` directory
  'build/csn.so',

  # Include one or more languages
  [
    'vendor/tree-sitter-go',
    'vendor/tree-sitter-java',
    'vendor/tree-sitter-javascript',
    'vendor/tree-sitter-php',
    'vendor/tree-sitter-python',
    'vendor/tree-sitter-ruby',
  ]
)

Esempio n. 23
0
  return false;
}
"""[1:-1])

subprocess.run(["npm", "install"], cwd=grammar_path, check=True)
subprocess.run(["npx", "tree-sitter", "generate"],
               cwd=grammar_path,
               check=True)
# Following are commented for future reference to expose playground
# Remove "--docker" if local environment matches with the container
# subprocess.run(["npx", "tree-sitter", "build-wasm", "--docker"],
#                cwd=grammar_path, check=True)

Language.build_library(grammar_path + "/build/wgsl.so", [
    grammar_path,
])

WGSL_LANGUAGE = Language(grammar_path + "/build/wgsl.so", "wgsl")

parser = Parser()
parser.set_language(WGSL_LANGUAGE)

error_list = []

for key, value in scanner_components[scanner_example.name()].items():
    if "expect-error" in key:
        continue
    value = value[:]
    if "function-scope" in key:
        value = ["fn function__scope____() {"] + value + ["}"]
# pylint: disable=missing-docstring

import re
from unittest import TestCase
from os import path
from tree_sitter import Language, Parser

LIB_PATH = path.join("build", "languages.so")
Language.build_library(
    LIB_PATH,
    [
        path.join("tests", "fixtures", "tree-sitter-python"),
        path.join("tests", "fixtures", "tree-sitter-javascript"),
    ],
)
PYTHON = Language(LIB_PATH, "python")
JAVASCRIPT = Language(LIB_PATH, "javascript")


class TestParser(TestCase):
    def test_set_language(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")
        self.assertEqual(
            tree.root_node.sexp(),
            trim("""(module (function_definition
                name: (identifier)
                parameters: (parameters)
                body: (block (expression_statement (call
                    function: (identifier)
Esempio n. 25
0
LANGS = {"java" : "https://github.com/tree-sitter/tree-sitter-java.git",
         "c" : "https://github.com/tree-sitter/tree-sitter-c.git"}
LIB = "tslib.so"

##
##
##

from os import chdir
from pathlib import Path
from subprocess import check_output, STDOUT
from tempfile import TemporaryDirectory
from tree_sitter import Language

target = Path(__file__).parent.absolute() / "badass/lang" / LIB
build = Path("build")
build.mkdir(exist_ok=True, parents=True)
chdir(build)

repos = []
for name, repo in LANGS.items() :
    local = Path(repo).stem
    if not Path(local).exists() :
        print("cloning", repo)
        check_output(["git", "clone", repo, local], stderr=STDOUT)
    repos.append(local)
print("building", target)
if target.exists() :
    target.unlink()
assert Language.build_library(str(target), repos)
import glob

from tree_sitter import Language

languages = [
    '/src/vendor/tree-sitter-python',
    '/src/vendor/tree-sitter-javascript',
    # '/src/vendor/tree-sitter-typescript/typescript',
    # '/src/vendor/tree-sitter-typescript/tsx',
    '/src/vendor/tree-sitter-go',
    '/src/vendor/tree-sitter-ruby',
    '/src/vendor/tree-sitter-java',
    '/src/vendor/tree-sitter-cpp',
    '/src/vendor/tree-sitter-php',
]

Language.build_library(
    # Store the library in the directory
    '/src/build/py-tree-sitter-languages.so',
    # Include one or more languages
    languages)
Esempio n. 27
0
import os

from tree_sitter import Language

from code_search import shared
from code_search.function_parser.utils import get_tree_sitter_languages_lib

languages = [
    os.path.join(shared.VENDOR_DIR, 'tree-sitter-python'),
    os.path.join(shared.VENDOR_DIR, 'tree-sitter-javascript'),
    os.path.join(shared.VENDOR_DIR, 'tree-sitter-go'),
    os.path.join(shared.VENDOR_DIR, 'tree-sitter-ruby'),
    os.path.join(shared.VENDOR_DIR, 'tree-sitter-java'),
    os.path.join(shared.VENDOR_DIR, 'tree-sitter-php')
]

Language.build_library(
    # Store the library in the directory
    get_tree_sitter_languages_lib(),
    # Include one or more languages
    languages)
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <*****@*****.**>
#
# Distributed under terms of the GPLv3 license.
"""

"""

from tree_sitter import Language, Parser
from queue import SimpleQueue

Language.build_library(
    # Store the library in the `build` directory
    'build/my-languages.so',

    # Include one or more languages
    [
        'tree-sitter-cpp',
    ])

CPP_LANGUAGE = Language('build/my-languages.so', 'cpp')
parser = Parser()
parser.set_language(CPP_LANGUAGE)

tree = parser.parse(
    bytes(
        """
#include <iostream>
#include <cstdlib>

auto main( int argc, char** argv ) -> int
Esempio n. 29
0
import streamlit as st
from transformers import AutoTokenizer, AutoModelWithLMHead, SummarizationPipeline
import torch
from tree_sitter import Language, Parser
import os
import base64
from io import StringIO

Language.build_library('build/my-languages.so', ['tree-sitter-python', 'tree-sitter-go', 'tree-sitter-javascript', 'tree-sitter-java'])

def build_parser(language):
    LANGUAGE = Language('build/my-languages.so', language)
    parser = Parser()
    parser.set_language(LANGUAGE)
    return parser

def get_string_from_code(node, lines, code_list):
    line_start = node.start_point[0]
    line_end = node.end_point[0]
    char_start = node.start_point[1]
    char_end = node.end_point[1]
    if line_start != line_end:
	code_list.append(' '.join([lines[line_start][char_start:]] + lines[line_start+1:line_end] + [lines[line_end][:char_end]]))
    else:
	code_list.append(lines[line_start][char_start:char_end])

def my_traverse(code, node, code_list):
    lines = code.split('\n')
    if node.child_count == 0:
	get_string_from_code(node, lines, code_list)
    elif node.type == 'string':
Esempio n. 30
0
import os
import sys
import ast
import inspect
from tree_sitter import Language, Parser

tree_sitter_python_file = os.path.join(
    os.path.dirname(inspect.getfile(sys.modules[__name__])),
    'tree-sitter-python.so')
if not os.path.isfile(tree_sitter_python_file):
    os.system(
        'git clone --depth 1 https://github.com/tree-sitter/tree-sitter-python'
    )
    Language.build_library(tree_sitter_python_file, ['tree-sitter-python'])
    os.system('rm -rf tree-sitter-python')
PY_LANGUAGE = Language(tree_sitter_python_file, 'python')
parser = Parser()
parser.set_language(PY_LANGUAGE)

operator_registry = {}


def get_or_create_operator(identifier):
    if identifier in operator_registry:
        return operator_registry[identifier]
    return Operator(identifier)


class Operator:
    def __init__(self, identifier):
        operator_registry[identifier] = self