コード例 #1
0
 def test_set_language(self):
     parser = Parser()
     parser.set_language(PYTHON)
     tree = parser.parse(b"def foo():\n  bar()")
     self.assertEqual(
         tree.root_node.sexp(),
         trim("""(module (function_definition
             name: (identifier)
             parameters: (parameters)
             body: (block (expression_statement (call
                 function: (identifier)
                 arguments: (argument_list))))))"""),
     )
     parser.set_language(JAVASCRIPT)
     tree = parser.parse(b"function foo() {\n  bar();\n}")
     self.assertEqual(
         tree.root_node.sexp(),
         trim("""(program (function_declaration
             name: (identifier)
             parameters: (formal_parameters)
             body: (statement_block
                 (expression_statement
                      (call_expression
                         function: (identifier)
                         arguments: (arguments))))))"""),
     )
コード例 #2
0
    def test_edit(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")

        edit_offset = len(b"def foo(")
        tree.edit(
            start_byte=edit_offset,
            old_end_byte=edit_offset,
            new_end_byte=edit_offset + 2,
            start_point=(0, edit_offset),
            old_end_point=(0, edit_offset),
            new_end_point=(0, edit_offset + 2),
        )

        fn_node = tree.root_node.children[0]
        self.assertEqual(fn_node.type, 'function_definition')
        self.assertTrue(fn_node.has_changes)
        self.assertFalse(fn_node.children[0].has_changes)
        self.assertFalse(fn_node.children[1].has_changes)
        self.assertFalse(fn_node.children[3].has_changes)

        params_node = fn_node.children[2]
        self.assertEqual(params_node.type, 'parameters')
        self.assertTrue(params_node.has_changes)
        self.assertEqual(params_node.start_point, (0, edit_offset - 1))
        self.assertEqual(params_node.end_point, (0, edit_offset + 3))

        new_tree = parser.parse(b"def foo(ab):\n  bar()", tree)
        self.assertEqual(
            new_tree.root_node.sexp(), "(module (function_definition "
            "(identifier) "
            "(parameters (identifier)) "
            "(expression_statement (call (identifier) (argument_list)))))")
コード例 #3
0
    def test_node_text(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"[0, [1, 2, 3]]")

        self.assertEqual(tree.text, b"[0, [1, 2, 3]]")

        root_node = tree.root_node
        self.assertEqual(root_node.text, b'[0, [1, 2, 3]]')

        exp_stmt_node = root_node.children[0]
        self.assertEqual(exp_stmt_node.text, b'[0, [1, 2, 3]]')

        list_node = exp_stmt_node.children[0]
        self.assertEqual(list_node.text, b'[0, [1, 2, 3]]')

        open_delim_node = list_node.children[0]
        self.assertEqual(open_delim_node.text, b'[')

        first_num_node = list_node.children[1]
        self.assertEqual(first_num_node.text, b'0')

        first_comma_node = list_node.children[2]
        self.assertEqual(first_comma_node.text, b',')

        child_list_node = list_node.children[3]
        self.assertEqual(child_list_node.text, b'[1, 2, 3]')

        close_delim_node = list_node.children[4]
        self.assertEqual(close_delim_node.text, b']')

        edit_offset = len(b"[0, [")
        tree.edit(
            start_byte=edit_offset,
            old_end_byte=edit_offset,
            new_end_byte=edit_offset + 2,
            start_point=(0, edit_offset),
            old_end_point=(0, edit_offset),
            new_end_point=(0, edit_offset + 2),
        )
        self.assertEqual(tree.text, None)

        root_node_again = tree.root_node
        self.assertEqual(root_node_again.text, None)

        tree_text_false = parser.parse(b"[0, [1, 2, 3]]", keep_text=False)
        self.assertIsNone(tree_text_false.text)
        root_node_text_false = tree_text_false.root_node
        self.assertIsNone(root_node_text_false.text)

        tree_text_true = parser.parse(b"[0, [1, 2, 3]]", keep_text=True)
        self.assertEqual(tree_text_true.text, b"[0, [1, 2, 3]]")
        root_node_text_true = tree_text_true.root_node
        self.assertEqual(root_node_text_true.text, b"[0, [1, 2, 3]]")
コード例 #4
0
ファイル: syntax_match.py プロジェクト: modit-team/MODIT
def corpus_syntax_match(references, candidates, lang):
    JAVA_LANGUAGE = Language('parser/my-languages.so', lang)
    parser = Parser()
    parser.set_language(JAVA_LANGUAGE)
    match_count = 0
    total_count = 0

    for i in range(len(candidates)):
        references_sample = references[i]
        candidate = candidates[i]
        for reference in references_sample:
            try:
                candidate = remove_comments_and_docstrings(candidate, 'java')
            except:
                pass
            try:
                reference = remove_comments_and_docstrings(reference, 'java')
            except:
                pass

            candidate_tree = parser.parse(bytes(candidate, 'utf8')).root_node

            reference_tree = parser.parse(bytes(reference, 'utf8')).root_node

            def get_all_sub_trees(root_node):
                node_stack = []
                sub_tree_sexp_list = []
                depth = 1
                node_stack.append([root_node, depth])
                while len(node_stack) != 0:
                    cur_node, cur_depth = node_stack.pop()
                    sub_tree_sexp_list.append([cur_node.sexp(), cur_depth])
                    for child_node in cur_node.children:
                        if len(child_node.children) != 0:
                            depth = cur_depth + 1
                            node_stack.append([child_node, depth])
                return sub_tree_sexp_list

            cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)]
            ref_sexps = get_all_sub_trees(reference_tree)

            # print(cand_sexps)
            # print(ref_sexps)

            for sub_tree, depth in ref_sexps:
                if sub_tree in cand_sexps:
                    match_count += 1
            total_count += len(ref_sexps)

    score = match_count / total_count
    return score
コード例 #5
0
 def test_set_language(self):
     parser = Parser()
     parser.set_language(PYTHON)
     tree = parser.parse(b"def foo():\n  bar()")
     self.assertEqual(
         tree.root_node.sexp(),
         "(module (function_definition (identifier) (parameters) (expression_statement (call (identifier) (argument_list)))))"
     )
     parser.set_language(JAVASCRIPT)
     tree = parser.parse(b"function foo() {\n  bar();\n}")
     self.assertEqual(
         tree.root_node.sexp(),
         "(program (function (identifier) (formal_parameters) (statement_block (expression_statement (call_expression (identifier) (arguments))))))"
     )
コード例 #6
0
def parse_program(program: str,
                  lang: str = None,
                  parser: Parser = None) -> nx.DiGraph:
    if parser is None:
        if lang is None:
            raise Exception(
                "either lang should be giver or parser should be given")
        parser: Parser = get_parser(lang)

    tree = parser.parse(bytes(program, "utf8"))

    g: nx.DiGraph = nx.DiGraph()

    queue: Queue = Queue()
    queue.put(tree.root_node)

    while not queue.empty():

        node = queue.get()

        if not hasattr(node, 'children'):
            continue

        for child in node.children:
            g.add_edge(TreeSitterNode(node, program),
                       TreeSitterNode(child, program))
            queue.put(child)

    return g
コード例 #7
0
    def test_captures(self):
        parser = Parser()
        parser.set_language(PYTHON)
        source = b"def foo():\n  bar()\ndef baz():\n  quux()\n"
        tree = parser.parse(source)
        query = PYTHON.query("""
            (function_definition name: (identifier) @func-def)
            (call function: (identifier) @func-call)
            """)

        captures = query.captures(tree.root_node)
        captures = query.captures(tree.root_node)
        captures = query.captures(tree.root_node)
        captures = query.captures(tree.root_node)

        self.assertEqual(captures[0][0].start_point, (0, 4))
        self.assertEqual(captures[0][0].end_point, (0, 7))
        self.assertEqual(captures[0][1], "func-def")

        self.assertEqual(captures[1][0].start_point, (1, 2))
        self.assertEqual(captures[1][0].end_point, (1, 5))
        self.assertEqual(captures[1][1], "func-call")

        self.assertEqual(captures[2][0].start_point, (2, 4))
        self.assertEqual(captures[2][0].end_point, (2, 7))
        self.assertEqual(captures[2][1], "func-def")

        self.assertEqual(captures[3][0].start_point, (3, 2))
        self.assertEqual(captures[3][0].end_point, (3, 6))
        self.assertEqual(captures[3][1], "func-call")
コード例 #8
0
ファイル: ts.py プロジェクト: forest520/csn
    def __init__(self,
                 code,
                 language='python',
                 tree_style='AST',
                 path_style='U2D'):
        # AST | SPT || HST | HPT
        self.tree_style = tree_style
        # L2L | UD | U2D
        self.path_style = path_style
        # Use the Language.build_library method to compile these
        # into a library that's usable from Python:
        csn_so = 'scripts/build/csn.so'
        # Language.build_library(
        #   csn_so,
        #   [
        #     'vendor/tree-sitter-go',
        #     'vendor/tree-sitter-java',
        #     'vendor/tree-sitter-javascript',
        #     'vendor/tree-sitter-php',
        #     'vendor/tree-sitter-python',
        #     'vendor/tree-sitter-ruby',
        #   ]
        # )
        parser = Parser()
        # Load the languages into your app as Language objects:
        # ('go', 'java', 'javascript', 'php', 'python', 'ruby')
        parser.set_language(Language(csn_so, language))
        tree = parser.parse(code.encode())
        code_lines = code.split('\n')
        self.root, self.terminals = self.traverse(tree, code_lines)

        self.debug = True
        if self.debug:
            print(f'{language}{"@" * 9}code\n{code}')
            print(f'{language}{"@" * 9}sexp\n{tree.root_node.sexp()}')
コード例 #9
0
    def test_child_by_field_id(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")
        root_node = tree.root_node
        fn_node = tree.root_node.children[0]

        self.assertEqual(PYTHON.field_id_for_name("nameasdf"), None)
        name_field = PYTHON.field_id_for_name("name")
        alias_field = PYTHON.field_id_for_name("alias")
        self.assertIsInstance(alias_field, int)
        self.assertIsInstance(name_field, int)
        self.assertEqual(root_node.child_by_field_id(alias_field), None)
        self.assertEqual(root_node.child_by_field_id(name_field), None)
        self.assertEqual(fn_node.child_by_field_id(alias_field), None)
        self.assertEqual(
            fn_node.child_by_field_id(name_field).type, "identifier")
        self.assertRaises(TypeError, root_node.child_by_field_id, "")
        self.assertRaises(TypeError, root_node.child_by_field_name, True)
        self.assertRaises(TypeError, root_node.child_by_field_name, 1)

        self.assertEqual(
            fn_node.child_by_field_name("name").type, "identifier")
        self.assertEqual(fn_node.child_by_field_name("asdfasdfname"), None)

        self.assertEqual(
            fn_node.child_by_field_name("name"),
            fn_node.child_by_field_name("name"),
        )
コード例 #10
0
    def test_read_callback(self):
        parser = Parser()
        parser.set_language(PYTHON)
        source_lines = ["def foo():\n", "  bar()"]

        def read_callback(byte_offset, point):
            row, column = point
            if row >= len(source_lines):
                return None
            if column >= len(source_lines[row]):
                return None
            return source_lines[row][column:].encode("utf8")

        tree = parser.parse(read_callback)
        self.assertEqual(
            tree.root_node.sexp(),
            trim(
                """(module (function_definition
                name: (identifier)
                parameters: (parameters)
                body: (block (expression_statement (call
                    function: (identifier)
                    arguments: (argument_list))))))"""
            ),
        )
コード例 #11
0
    def run(self, tmp_dir, params):
        self.ret = 0
        self.log = ''

        try:
            lib = self.find_lib()

            lang = Language(lib, 'verilog')

            parser = Parser()
            parser.set_language(lang)
        except Exception as e:
            self.log += f'{e}\n'
            self.ret = 1

        for src in params['files']:
            f = None
            try:
                f = open(src, 'rb')
            except IOError:
                self.ret = 1
                self.log_error(src, '', '', 'failed to open file')
                continue

            try:
                tree = parser.parse(f.read())
                if self.walk(tree.root_node, src):
                    self.ret = 1
            except Exception as e:
                self.log_error(src, '', '', 'unknown error: ' + str(e))
                self.ret = 1
        usage = resource.getrusage(resource.RUSAGE_SELF)
        profiling_data = (usage.ru_utime, usage.ru_stime, usage.ru_maxrss)

        return (self.log, self.ret) + profiling_data
コード例 #12
0
def file_parse(path,name):
    Language.build_library('../build/my-languages.so', ['../tree-sitter-python'])
    PY_LANGUAGE = Language('../build/my-languages.so', 'python')
    parser = Parser()
    parser.set_language(PY_LANGUAGE)
    code = read_file(str(path))
    encoded_code = bytes(code, "utf8")
    tree = parser.parse(encoded_code)
    cursor = tree.walk()
    root_node = tree.root_node

    Graph = nx.DiGraph()
    f= open('result_dot/'+str(name)+'.dot','w') 
    f.write('digraph G{\n')
    f.write('rankdir="LR";\n')
    traverse(root_node,Graph,encoded_code,f)
    global import_lists
    write_together(f,import_lists)


    f.write("}")
    f.close()

    
    #write_in_dot(Graph)
    return None
コード例 #13
0
ファイル: tree_sitter.py プロジェクト: calon/vim
class TreeSitter(object):
    def __init__(self, language_type, encoding='utf-8'):
        self.language_type = language_type
        self.encoding = encoding
        self.parser = Parser()
        self.parser.set_language(Language(LIB_BIN, self.language_type))
        self.UpdateBuffer([""])
        self._res = []

    def DFS(self, node, tokenModifiers: list):
        for item in node.children:
            temp = tokenModifiers
            if len(item.children) != 0:
                temp = copy.copy(tokenModifiers)
                temp.append(item.type)
                self.DFS(item, temp)
            self._res.append({
                'node': item.type,
                'tokenModifiers': tokenModifiers
            })

    def GetSematicToken(self):
        self._res = []
        self.DFS(self.tree.root_node, [])
        return self._res

    def UpdateBuffer(self, content_list):
        self.tree = self.parser.parse(
            bytes("\n".join(content_list), self.encoding))
コード例 #14
0
def parse_program(program: str, parser: Parser = None) -> nx.DiGraph:
    if parser is None:
        parser: Parser = get_parser()

    tree = parser.parse(bytes(program, "utf8"))

    g: nx.DiGraph = nx.DiGraph()

    queue: Queue = Queue()
    queue.put(tree.root_node)

    while not queue.empty():

        node = queue.get()

        if not hasattr(node, 'children'):
            continue

        for child in node.children:
            g.add_edge(TreeSitterNode(node, program),
                       TreeSitterNode(child, program))

            queue.put(child)

    return g
コード例 #15
0
    def test_field_name_for_child(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        tree = parser.parse(b"<div a={1} b={2} />")
        jsx_node = tree.root_node.children[0].children[0]

        self.assertEqual(jsx_node.field_name_for_child(0), None)
        self.assertEqual(jsx_node.field_name_for_child(1), "name")
コード例 #16
0
    def test_children_by_field_name(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        tree = parser.parse(b"<div a={1} b={2} />")
        jsx_node = tree.root_node.children[0].children[0]

        attributes = jsx_node.children_by_field_name("attribute")
        self.assertEqual([a.type for a in attributes], ["jsx_attribute", "jsx_attribute"])
コード例 #17
0
class Code_Parser():
    def __init__(self, grammar, language="python", parser_library_path='src/tree-sitter/tree-sitter-python', **kwargs):
        Language.build_library('/build/my-languages.so',[parser_library_path])
        
        LANGUAGE = Language('/build/my-languages.so', language)
        
        self.grammar = grammar
        
        self.TS_parser = Parser()
        self.TS_parser.set_language(LANGUAGE)
        self.node_builder = NodeBuilder(self.grammar)
    
    def code_to_sequence(self, code_str):
        tree = self.TS_parser.parse(bytes(code_str, "utf8"))
        root_node = tree.root_node
        sequence = self.TSTree_to_sequence(root_node, code_str)
        return sequence
    
    def TSTree_to_sequence(self, TSNode, code_str):
        node_sequence = [TSNode.type]
        if TSNode.type == "string":
            node_text = sub_str_from_coords(code_str, TSNode.start_point, TSNode.end_point)[1:-1]
            node_sequence += ["_string_start",'"',"<REDUCE>"]
            node_sequence += ["_string_content",node_text,"<REDUCE>"]
            node_sequence += ["_string_end",'"',"<REDUCE>"]
        elif TSNode.children == []:
            node_text = sub_str_from_coords(code_str, TSNode.start_point, TSNode.end_point)
            if TSNode.type != node_text:
                node_sequence.append(node_text)
        elif TSNode.children != []:
            for child in TSNode.children:
                node_sequence += self.TSTree_to_sequence(child, code_str)
        node_sequence.append("<REDUCE>")
        return node_sequence
    
    def is_valid_sequence(self, sequence):
        first_node = sequence[0]
        if first_node != "module":
            return False
        partial_tree = PartialTree(first_node, self.node_builder)
        try:
            for expansion in sequence[1:]:
                partial_tree.add_action(expansion)
        except Exception as e:
            return False
        return True 
        
    def sequence_to_partial_tree(self, sequence):
        first_node = sequence[0]
        partial_tree = PartialTree(first_node, self.node_builder)
        try:
            for expansion in sequence[1:]:
                partial_tree.add_action(expansion)
        except Exception as e:
            print("ERROR!")
            traceback.print_exc()
            print("-------")
        return partial_tree 
コード例 #18
0
    def test_edit(self, input_type):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(input_type(b"def foo():\n  bar()"))

        edit_offset = len(b"def foo(")
        tree.edit(
            start_byte=edit_offset,
            old_end_byte=edit_offset,
            new_end_byte=edit_offset + 2,
            start_point=(0, edit_offset),
            old_end_point=(0, edit_offset),
            new_end_point=(0, edit_offset + 2),
        )

        fn_node = tree.root_node.children[0]
        self.assertEqual(fn_node.type, "function_definition")
        self.assertTrue(fn_node.has_changes)
        self.assertFalse(fn_node.children[0].has_changes)
        self.assertFalse(fn_node.children[1].has_changes)
        self.assertFalse(fn_node.children[3].has_changes)

        params_node = fn_node.children[2]
        self.assertEqual(params_node.type, "parameters")
        self.assertTrue(params_node.has_changes)
        self.assertEqual(params_node.start_point, (0, edit_offset - 1))
        self.assertEqual(params_node.end_point, (0, edit_offset + 3))

        new_tree = parser.parse(input_type(b"def foo(ab):\n  bar()"), tree)
        self.assertEqual(
            new_tree.root_node.sexp(),
            trim("""(module (function_definition
                name: (identifier)
                parameters: (parameters (identifier))
                body: (block
                    (expression_statement (call
                        function: (identifier)
                        arguments: (argument_list))))))"""),
        )
コード例 #19
0
    def test_get_changed_ranges(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")

        edit_offset = len(b"def foo(")
        tree.edit(
            start_byte=edit_offset,
            old_end_byte=edit_offset,
            new_end_byte=edit_offset + 2,
            start_point=(0, edit_offset),
            old_end_point=(0, edit_offset),
            new_end_point=(0, edit_offset + 2),
        )

        new_tree = parser.parse(b"def foo(ab):\n  bar()", tree)
        changed_ranges = tree.get_changed_ranges(new_tree)

        self.assertEqual(len(changed_ranges), 1)
        self.assertEqual(changed_ranges[0].start_byte, edit_offset)
        self.assertEqual(changed_ranges[0].start_point, (0, edit_offset))
        self.assertEqual(changed_ranges[0].end_byte, edit_offset + 2)
        self.assertEqual(changed_ranges[0].end_point, (0, edit_offset + 2))
コード例 #20
0
ファイル: main.py プロジェクト: Krosent/gumtree-java-xml-gen
def main(file):

    this_directory = os.path.dirname(__file__)
    # filename = os.path.join(this_directory, '/relative/path/to/file/you/want')
    # This code is used to configure parsing tool Tree Sitter
    Language.build_library(
        # Store the library in the `build` directory
        os.path.join(this_directory, 'build/my-languages.so'),

        # Include one or more languages
        [
            # 'vendor/tree-sitter-go',
            os.path.join(this_directory, 'vendor/tree-sitter-java')
            # 'vendor/tree-sitter-python'
        ])
    java_lang = Language(os.path.join(this_directory, 'build/my-languages.so'),
                         'java')

    # Parsing algorithm starts here
    parser = Parser()
    parser.set_language(java_lang)

    # For debugging
    tree_sitter_tree = parser.parse(read_file(file))

    # For production
    # tree_sitter_tree = parser.parse(read_file(file))

    gumtree_ast = to_gumtree_node(tree_sitter_tree.root_node)

    # everything should be inside the tag
    root_node = doc.createElement('root')

    # in test case they have context tag, which is empty. Do not know why we need it
    context_node = doc.createElement('context')

    # We append our root node to document
    doc.appendChild(root_node)

    # Append context tag to root node (<root> </root)
    root_node.appendChild(context_node)

    # append data into <root> tag. At this stage we append parsed code structure.
    root_node.appendChild(gumtree_ast)

    # Recursively add children nodes (if exist)
    process_node(tree_sitter_tree.root_node, gumtree_ast)

    xml = doc.toprettyxml()
    print(xml)
コード例 #21
0
ファイル: java.py プロジェクト: WM-SEMERU/ds4se
def add_lcom5(df, col):
    lang_builds = create_parser_builds()
    parser = Parser()
    class_lcom5 = []

    for i in range(len(df)):
        ext = df["name"][i].split('.')[-1]
        parser.set_language(lang_builds[ext])
        enc = df["encoding"][i]
        tree = parser.parse(bytes(df["contents"][i], df["encoding"][i]))
        class_lcom5.append(calculate_lcom5(tree, ext, bytes(df["contents"][i], df["encoding"][i]), df["name"][i]))
    df["class_lcom5"] = class_lcom5

    return df
コード例 #22
0
    def test_multibyte_characters(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        source_code = bytes("'😎' && '🐍'", "utf8")
        tree = parser.parse(source_code)
        root_node = tree.root_node
        statement_node = root_node.children[0]
        binary_node = statement_node.children[0]
        snake_node = binary_node.children[2]

        self.assertEqual(binary_node.type, "binary_expression")
        self.assertEqual(snake_node.type, "string")
        self.assertEqual(
            source_code[snake_node.start_byte:snake_node.end_byte].decode(
                'utf8'), "'🐍'")
コード例 #23
0
ファイル: subtree.py プロジェクト: fivejjs/infercode
def main(opt):
        parser = Parser()
        lang = Languages.get(opt.language[0])
        parser.set_language(lang)
        lang_node_types_filename = "node_types_{}.csv".format(opt.language[0])
        selected_node_types = {}
        if exists(lang_node_types_filename):
            lang_node_types = open(lang_node_types_filename, "r").read().splitlines()
            for lang_node_type in lang_node_types:
                selected_node_types[lang_node_type.lower()] = 1
        data = open(opt.filename[0], "rb").read()
        tree = parser.parse(data)
        reports = {}
        s = print_subtree(data, tree.root_node, reports, selected_node_types)
        for report in reports:
            print(reports[report])
コード例 #24
0
    def test_point_range_captures(self):
        parser = Parser()
        parser.set_language(PYTHON)
        source = b"def foo():\n  bar()\ndef baz():\n  quux()\n"
        tree = parser.parse(source)
        query = PYTHON.query(
            """
            (function_definition name: (identifier) @func-def)
            (call function: (identifier) @func-call)
            """
        )

        captures = query.captures(tree.root_node, start_point=(1, 0), end_point=(2, 0))
        # FIXME: this test is incorrect
        self.assertEqual(captures[1][0].start_point, (1, 2))
        self.assertEqual(captures[1][0].end_point, (1, 5))
        self.assertEqual(captures[1][1], "func-call")
コード例 #25
0
ファイル: main.py プロジェクト: JetBrains-Research/astminer
class TreeSitterLauncher:
    _parser: Parser

    def __init__(self, language, library_path):
        grammar = Language(library_path, language)
        self._parser = Parser()
        self._parser.set_language(grammar)

    def _get_code_bytes(self, filepath: str) -> bytes:
        file = open(filepath, "r")
        return bytes(file.read(), "utf-8")

    def parse_file(self, filepath: str) -> TreeAsDict:
        code_bytes = self._get_code_bytes(filepath)
        tree_sitter_tree = self._parser.parse(code_bytes)
        cursor = tree_sitter_tree.walk()
        return TreeBuilder(cursor, code_bytes).get_tree_as_dict()
コード例 #26
0
    def test_walk(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")
        cursor = tree.walk()

        # Node always returns the same instance
        self.assertIs(cursor.node, cursor.node)

        self.assertEqual(cursor.node.type, "module")
        self.assertEqual(cursor.node.start_byte, 0)
        self.assertEqual(cursor.node.end_byte, 18)
        self.assertEqual(cursor.node.start_point, (0, 0))
        self.assertEqual(cursor.node.end_point, (1, 7))
        self.assertEqual(cursor.current_field_name(), None)

        self.assertTrue(cursor.goto_first_child())
        self.assertEqual(cursor.node.type, "function_definition")
        self.assertEqual(cursor.node.start_byte, 0)
        self.assertEqual(cursor.node.end_byte, 18)
        self.assertEqual(cursor.node.start_point, (0, 0))
        self.assertEqual(cursor.node.end_point, (1, 7))
        self.assertEqual(cursor.current_field_name(), None)

        self.assertTrue(cursor.goto_first_child())
        self.assertEqual(cursor.node.type, "def")
        self.assertEqual(cursor.node.is_named, False)
        self.assertEqual(cursor.node.sexp(), '("def")')
        self.assertEqual(cursor.current_field_name(), None)
        def_node = cursor.node

        # Node remains cached after a failure to move
        self.assertFalse(cursor.goto_first_child())
        self.assertIs(cursor.node, def_node)

        self.assertTrue(cursor.goto_next_sibling())
        self.assertEqual(cursor.node.type, "identifier")
        self.assertEqual(cursor.node.is_named, True)
        self.assertEqual(cursor.current_field_name(), "name")
        self.assertFalse(cursor.goto_first_child())

        self.assertTrue(cursor.goto_next_sibling())
        self.assertEqual(cursor.node.type, "parameters")
        self.assertEqual(cursor.node.is_named, True)
        self.assertEqual(cursor.current_field_name(), "parameters")
コード例 #27
0
ファイル: __init__.py プロジェクト: sebastien/cells
class TSParser:
    def __init__(self, lang: str):
        self.lang = lang
        self.parser = TSBaseParser()
        self.tsLang = Language(LIBRARY_PATH, lang)
        self.parser.set_language(self.tsLang)

    def parse(self, code: str) -> Node:
        return self(code).root_node

    def sexp(self, code: str) -> str:
        return self.parse(code).sexp()

    def query(self, query: str, code: str) -> dict[str, str]:
        return dict((k, extract(v, code)) for v, k in self.tsLang.query(
            query).captures(self.parse(code)))

    def __call__(self, value: str) -> Tree:
        return self.parser.parse(bytes(value, "utf8"))
コード例 #28
0
    def test_children(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")

        root_node = tree.root_node
        self.assertEqual(root_node.type, "module")
        self.assertEqual(root_node.start_byte, 0)
        self.assertEqual(root_node.end_byte, 18)
        self.assertEqual(root_node.start_point, (0, 0))
        self.assertEqual(root_node.end_point, (1, 7))

        # List object is reused
        self.assertIs(root_node.children, root_node.children)

        fn_node = root_node.children[0]
        self.assertEqual(fn_node.type, "function_definition")
        self.assertEqual(fn_node.start_byte, 0)
        self.assertEqual(fn_node.end_byte, 18)
        self.assertEqual(fn_node.start_point, (0, 0))
        self.assertEqual(fn_node.end_point, (1, 7))

        def_node = fn_node.children[0]
        self.assertEqual(def_node.type, "def")
        self.assertEqual(def_node.is_named, False)

        id_node = fn_node.children[1]
        self.assertEqual(id_node.type, "identifier")
        self.assertEqual(id_node.is_named, True)
        self.assertEqual(len(id_node.children), 0)

        params_node = fn_node.children[2]
        self.assertEqual(params_node.type, "parameters")
        self.assertEqual(params_node.is_named, True)

        colon_node = fn_node.children[3]
        self.assertEqual(colon_node.type, ":")
        self.assertEqual(colon_node.is_named, False)

        statement_node = fn_node.children[4]
        self.assertEqual(statement_node.type, "block")
        self.assertEqual(statement_node.is_named, True)
コード例 #29
0
    def test_multibyte_characters_via_read_callback(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        source_code = bytes("'😎' && '🐍'", "utf8")

        def read(byte_position, point):
            return source_code[byte_position:byte_position+1]

        tree = parser.parse(read)
        root_node = tree.root_node
        statement_node = root_node.children[0]
        binary_node = statement_node.children[0]
        snake_node = binary_node.children[2]

        self.assertEqual(binary_node.type, "binary_expression")
        self.assertEqual(snake_node.type, "string")
        self.assertEqual(
            source_code[snake_node.start_byte:snake_node.end_byte].decode("utf8"),
            "'🐍'",
        )
コード例 #30
0
def parse_program(program: str,
                  parser: Parser = None,
                  code2vec: Word2VecKeyedVectors = None) -> nx.DiGraph:
    if parser is None:
        parser: Parser = get_parser()

    tree = parser.parse(bytes(program, "utf8"))

    # 建立一个空的有向图
    g: nx.DiGraph = nx.DiGraph()

    queue: Queue = Queue()
    queue.put(tree.root_node)

    while not queue.empty():
        # 按照宽度优先的顺序来建立一个有向图
        node = queue.get()

        if not hasattr(node, 'children'):
            continue

        # 依次将父节点与子节点连接起来:root-child 建立边的关系
        for child in node.children:
            g.add_edge(TreeSitterNode(node, program),
                       TreeSitterNode(child, program))

            queue.put(child)

    # embedding are added to each node
    # 使用code2vec的嵌入表示来初始化表示图中的节点
    if code2vec is not None:
        zeros = np.zeros(code2vec.vector_size)
        for node in g.nodes:
            name = node.name.lower()
            if name in code2vec:
                g.add_node(node, data=code2vec.get_vector(name))
            else:
                g.add_node(node, data=zeros)

    return g