def test_set_language(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") self.assertEqual( tree.root_node.sexp(), trim("""(module (function_definition name: (identifier) parameters: (parameters) body: (block (expression_statement (call function: (identifier) arguments: (argument_list))))))"""), ) parser.set_language(JAVASCRIPT) tree = parser.parse(b"function foo() {\n bar();\n}") self.assertEqual( tree.root_node.sexp(), trim("""(program (function_declaration name: (identifier) parameters: (formal_parameters) body: (statement_block (expression_statement (call_expression function: (identifier) arguments: (arguments))))))"""), )
def test_edit(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") edit_offset = len(b"def foo(") tree.edit( start_byte=edit_offset, old_end_byte=edit_offset, new_end_byte=edit_offset + 2, start_point=(0, edit_offset), old_end_point=(0, edit_offset), new_end_point=(0, edit_offset + 2), ) fn_node = tree.root_node.children[0] self.assertEqual(fn_node.type, 'function_definition') self.assertTrue(fn_node.has_changes) self.assertFalse(fn_node.children[0].has_changes) self.assertFalse(fn_node.children[1].has_changes) self.assertFalse(fn_node.children[3].has_changes) params_node = fn_node.children[2] self.assertEqual(params_node.type, 'parameters') self.assertTrue(params_node.has_changes) self.assertEqual(params_node.start_point, (0, edit_offset - 1)) self.assertEqual(params_node.end_point, (0, edit_offset + 3)) new_tree = parser.parse(b"def foo(ab):\n bar()", tree) self.assertEqual( new_tree.root_node.sexp(), "(module (function_definition " "(identifier) " "(parameters (identifier)) " "(expression_statement (call (identifier) (argument_list)))))")
def test_node_text(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"[0, [1, 2, 3]]") self.assertEqual(tree.text, b"[0, [1, 2, 3]]") root_node = tree.root_node self.assertEqual(root_node.text, b'[0, [1, 2, 3]]') exp_stmt_node = root_node.children[0] self.assertEqual(exp_stmt_node.text, b'[0, [1, 2, 3]]') list_node = exp_stmt_node.children[0] self.assertEqual(list_node.text, b'[0, [1, 2, 3]]') open_delim_node = list_node.children[0] self.assertEqual(open_delim_node.text, b'[') first_num_node = list_node.children[1] self.assertEqual(first_num_node.text, b'0') first_comma_node = list_node.children[2] self.assertEqual(first_comma_node.text, b',') child_list_node = list_node.children[3] self.assertEqual(child_list_node.text, b'[1, 2, 3]') close_delim_node = list_node.children[4] self.assertEqual(close_delim_node.text, b']') edit_offset = len(b"[0, [") tree.edit( start_byte=edit_offset, old_end_byte=edit_offset, new_end_byte=edit_offset + 2, start_point=(0, edit_offset), old_end_point=(0, edit_offset), new_end_point=(0, edit_offset + 2), ) self.assertEqual(tree.text, None) root_node_again = tree.root_node self.assertEqual(root_node_again.text, None) tree_text_false = parser.parse(b"[0, [1, 2, 3]]", keep_text=False) self.assertIsNone(tree_text_false.text) root_node_text_false = tree_text_false.root_node self.assertIsNone(root_node_text_false.text) tree_text_true = parser.parse(b"[0, [1, 2, 3]]", keep_text=True) self.assertEqual(tree_text_true.text, b"[0, [1, 2, 3]]") root_node_text_true = tree_text_true.root_node self.assertEqual(root_node_text_true.text, b"[0, [1, 2, 3]]")
def corpus_syntax_match(references, candidates, lang): JAVA_LANGUAGE = Language('parser/my-languages.so', lang) parser = Parser() parser.set_language(JAVA_LANGUAGE) match_count = 0 total_count = 0 for i in range(len(candidates)): references_sample = references[i] candidate = candidates[i] for reference in references_sample: try: candidate = remove_comments_and_docstrings(candidate, 'java') except: pass try: reference = remove_comments_and_docstrings(reference, 'java') except: pass candidate_tree = parser.parse(bytes(candidate, 'utf8')).root_node reference_tree = parser.parse(bytes(reference, 'utf8')).root_node def get_all_sub_trees(root_node): node_stack = [] sub_tree_sexp_list = [] depth = 1 node_stack.append([root_node, depth]) while len(node_stack) != 0: cur_node, cur_depth = node_stack.pop() sub_tree_sexp_list.append([cur_node.sexp(), cur_depth]) for child_node in cur_node.children: if len(child_node.children) != 0: depth = cur_depth + 1 node_stack.append([child_node, depth]) return sub_tree_sexp_list cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)] ref_sexps = get_all_sub_trees(reference_tree) # print(cand_sexps) # print(ref_sexps) for sub_tree, depth in ref_sexps: if sub_tree in cand_sexps: match_count += 1 total_count += len(ref_sexps) score = match_count / total_count return score
def test_set_language(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") self.assertEqual( tree.root_node.sexp(), "(module (function_definition (identifier) (parameters) (expression_statement (call (identifier) (argument_list)))))" ) parser.set_language(JAVASCRIPT) tree = parser.parse(b"function foo() {\n bar();\n}") self.assertEqual( tree.root_node.sexp(), "(program (function (identifier) (formal_parameters) (statement_block (expression_statement (call_expression (identifier) (arguments))))))" )
def parse_program(program: str, lang: str = None, parser: Parser = None) -> nx.DiGraph: if parser is None: if lang is None: raise Exception( "either lang should be giver or parser should be given") parser: Parser = get_parser(lang) tree = parser.parse(bytes(program, "utf8")) g: nx.DiGraph = nx.DiGraph() queue: Queue = Queue() queue.put(tree.root_node) while not queue.empty(): node = queue.get() if not hasattr(node, 'children'): continue for child in node.children: g.add_edge(TreeSitterNode(node, program), TreeSitterNode(child, program)) queue.put(child) return g
def test_captures(self): parser = Parser() parser.set_language(PYTHON) source = b"def foo():\n bar()\ndef baz():\n quux()\n" tree = parser.parse(source) query = PYTHON.query(""" (function_definition name: (identifier) @func-def) (call function: (identifier) @func-call) """) captures = query.captures(tree.root_node) captures = query.captures(tree.root_node) captures = query.captures(tree.root_node) captures = query.captures(tree.root_node) self.assertEqual(captures[0][0].start_point, (0, 4)) self.assertEqual(captures[0][0].end_point, (0, 7)) self.assertEqual(captures[0][1], "func-def") self.assertEqual(captures[1][0].start_point, (1, 2)) self.assertEqual(captures[1][0].end_point, (1, 5)) self.assertEqual(captures[1][1], "func-call") self.assertEqual(captures[2][0].start_point, (2, 4)) self.assertEqual(captures[2][0].end_point, (2, 7)) self.assertEqual(captures[2][1], "func-def") self.assertEqual(captures[3][0].start_point, (3, 2)) self.assertEqual(captures[3][0].end_point, (3, 6)) self.assertEqual(captures[3][1], "func-call")
def __init__(self, code, language='python', tree_style='AST', path_style='U2D'): # AST | SPT || HST | HPT self.tree_style = tree_style # L2L | UD | U2D self.path_style = path_style # Use the Language.build_library method to compile these # into a library that's usable from Python: csn_so = 'scripts/build/csn.so' # Language.build_library( # csn_so, # [ # 'vendor/tree-sitter-go', # 'vendor/tree-sitter-java', # 'vendor/tree-sitter-javascript', # 'vendor/tree-sitter-php', # 'vendor/tree-sitter-python', # 'vendor/tree-sitter-ruby', # ] # ) parser = Parser() # Load the languages into your app as Language objects: # ('go', 'java', 'javascript', 'php', 'python', 'ruby') parser.set_language(Language(csn_so, language)) tree = parser.parse(code.encode()) code_lines = code.split('\n') self.root, self.terminals = self.traverse(tree, code_lines) self.debug = True if self.debug: print(f'{language}{"@" * 9}code\n{code}') print(f'{language}{"@" * 9}sexp\n{tree.root_node.sexp()}')
def test_child_by_field_id(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") root_node = tree.root_node fn_node = tree.root_node.children[0] self.assertEqual(PYTHON.field_id_for_name("nameasdf"), None) name_field = PYTHON.field_id_for_name("name") alias_field = PYTHON.field_id_for_name("alias") self.assertIsInstance(alias_field, int) self.assertIsInstance(name_field, int) self.assertEqual(root_node.child_by_field_id(alias_field), None) self.assertEqual(root_node.child_by_field_id(name_field), None) self.assertEqual(fn_node.child_by_field_id(alias_field), None) self.assertEqual( fn_node.child_by_field_id(name_field).type, "identifier") self.assertRaises(TypeError, root_node.child_by_field_id, "") self.assertRaises(TypeError, root_node.child_by_field_name, True) self.assertRaises(TypeError, root_node.child_by_field_name, 1) self.assertEqual( fn_node.child_by_field_name("name").type, "identifier") self.assertEqual(fn_node.child_by_field_name("asdfasdfname"), None) self.assertEqual( fn_node.child_by_field_name("name"), fn_node.child_by_field_name("name"), )
def test_read_callback(self): parser = Parser() parser.set_language(PYTHON) source_lines = ["def foo():\n", " bar()"] def read_callback(byte_offset, point): row, column = point if row >= len(source_lines): return None if column >= len(source_lines[row]): return None return source_lines[row][column:].encode("utf8") tree = parser.parse(read_callback) self.assertEqual( tree.root_node.sexp(), trim( """(module (function_definition name: (identifier) parameters: (parameters) body: (block (expression_statement (call function: (identifier) arguments: (argument_list))))))""" ), )
def run(self, tmp_dir, params): self.ret = 0 self.log = '' try: lib = self.find_lib() lang = Language(lib, 'verilog') parser = Parser() parser.set_language(lang) except Exception as e: self.log += f'{e}\n' self.ret = 1 for src in params['files']: f = None try: f = open(src, 'rb') except IOError: self.ret = 1 self.log_error(src, '', '', 'failed to open file') continue try: tree = parser.parse(f.read()) if self.walk(tree.root_node, src): self.ret = 1 except Exception as e: self.log_error(src, '', '', 'unknown error: ' + str(e)) self.ret = 1 usage = resource.getrusage(resource.RUSAGE_SELF) profiling_data = (usage.ru_utime, usage.ru_stime, usage.ru_maxrss) return (self.log, self.ret) + profiling_data
def file_parse(path,name): Language.build_library('../build/my-languages.so', ['../tree-sitter-python']) PY_LANGUAGE = Language('../build/my-languages.so', 'python') parser = Parser() parser.set_language(PY_LANGUAGE) code = read_file(str(path)) encoded_code = bytes(code, "utf8") tree = parser.parse(encoded_code) cursor = tree.walk() root_node = tree.root_node Graph = nx.DiGraph() f= open('result_dot/'+str(name)+'.dot','w') f.write('digraph G{\n') f.write('rankdir="LR";\n') traverse(root_node,Graph,encoded_code,f) global import_lists write_together(f,import_lists) f.write("}") f.close() #write_in_dot(Graph) return None
class TreeSitter(object): def __init__(self, language_type, encoding='utf-8'): self.language_type = language_type self.encoding = encoding self.parser = Parser() self.parser.set_language(Language(LIB_BIN, self.language_type)) self.UpdateBuffer([""]) self._res = [] def DFS(self, node, tokenModifiers: list): for item in node.children: temp = tokenModifiers if len(item.children) != 0: temp = copy.copy(tokenModifiers) temp.append(item.type) self.DFS(item, temp) self._res.append({ 'node': item.type, 'tokenModifiers': tokenModifiers }) def GetSematicToken(self): self._res = [] self.DFS(self.tree.root_node, []) return self._res def UpdateBuffer(self, content_list): self.tree = self.parser.parse( bytes("\n".join(content_list), self.encoding))
def parse_program(program: str, parser: Parser = None) -> nx.DiGraph: if parser is None: parser: Parser = get_parser() tree = parser.parse(bytes(program, "utf8")) g: nx.DiGraph = nx.DiGraph() queue: Queue = Queue() queue.put(tree.root_node) while not queue.empty(): node = queue.get() if not hasattr(node, 'children'): continue for child in node.children: g.add_edge(TreeSitterNode(node, program), TreeSitterNode(child, program)) queue.put(child) return g
def test_field_name_for_child(self): parser = Parser() parser.set_language(JAVASCRIPT) tree = parser.parse(b"<div a={1} b={2} />") jsx_node = tree.root_node.children[0].children[0] self.assertEqual(jsx_node.field_name_for_child(0), None) self.assertEqual(jsx_node.field_name_for_child(1), "name")
def test_children_by_field_name(self): parser = Parser() parser.set_language(JAVASCRIPT) tree = parser.parse(b"<div a={1} b={2} />") jsx_node = tree.root_node.children[0].children[0] attributes = jsx_node.children_by_field_name("attribute") self.assertEqual([a.type for a in attributes], ["jsx_attribute", "jsx_attribute"])
class Code_Parser(): def __init__(self, grammar, language="python", parser_library_path='src/tree-sitter/tree-sitter-python', **kwargs): Language.build_library('/build/my-languages.so',[parser_library_path]) LANGUAGE = Language('/build/my-languages.so', language) self.grammar = grammar self.TS_parser = Parser() self.TS_parser.set_language(LANGUAGE) self.node_builder = NodeBuilder(self.grammar) def code_to_sequence(self, code_str): tree = self.TS_parser.parse(bytes(code_str, "utf8")) root_node = tree.root_node sequence = self.TSTree_to_sequence(root_node, code_str) return sequence def TSTree_to_sequence(self, TSNode, code_str): node_sequence = [TSNode.type] if TSNode.type == "string": node_text = sub_str_from_coords(code_str, TSNode.start_point, TSNode.end_point)[1:-1] node_sequence += ["_string_start",'"',"<REDUCE>"] node_sequence += ["_string_content",node_text,"<REDUCE>"] node_sequence += ["_string_end",'"',"<REDUCE>"] elif TSNode.children == []: node_text = sub_str_from_coords(code_str, TSNode.start_point, TSNode.end_point) if TSNode.type != node_text: node_sequence.append(node_text) elif TSNode.children != []: for child in TSNode.children: node_sequence += self.TSTree_to_sequence(child, code_str) node_sequence.append("<REDUCE>") return node_sequence def is_valid_sequence(self, sequence): first_node = sequence[0] if first_node != "module": return False partial_tree = PartialTree(first_node, self.node_builder) try: for expansion in sequence[1:]: partial_tree.add_action(expansion) except Exception as e: return False return True def sequence_to_partial_tree(self, sequence): first_node = sequence[0] partial_tree = PartialTree(first_node, self.node_builder) try: for expansion in sequence[1:]: partial_tree.add_action(expansion) except Exception as e: print("ERROR!") traceback.print_exc() print("-------") return partial_tree
def test_edit(self, input_type): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(input_type(b"def foo():\n bar()")) edit_offset = len(b"def foo(") tree.edit( start_byte=edit_offset, old_end_byte=edit_offset, new_end_byte=edit_offset + 2, start_point=(0, edit_offset), old_end_point=(0, edit_offset), new_end_point=(0, edit_offset + 2), ) fn_node = tree.root_node.children[0] self.assertEqual(fn_node.type, "function_definition") self.assertTrue(fn_node.has_changes) self.assertFalse(fn_node.children[0].has_changes) self.assertFalse(fn_node.children[1].has_changes) self.assertFalse(fn_node.children[3].has_changes) params_node = fn_node.children[2] self.assertEqual(params_node.type, "parameters") self.assertTrue(params_node.has_changes) self.assertEqual(params_node.start_point, (0, edit_offset - 1)) self.assertEqual(params_node.end_point, (0, edit_offset + 3)) new_tree = parser.parse(input_type(b"def foo(ab):\n bar()"), tree) self.assertEqual( new_tree.root_node.sexp(), trim("""(module (function_definition name: (identifier) parameters: (parameters (identifier)) body: (block (expression_statement (call function: (identifier) arguments: (argument_list))))))"""), )
def test_get_changed_ranges(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") edit_offset = len(b"def foo(") tree.edit( start_byte=edit_offset, old_end_byte=edit_offset, new_end_byte=edit_offset + 2, start_point=(0, edit_offset), old_end_point=(0, edit_offset), new_end_point=(0, edit_offset + 2), ) new_tree = parser.parse(b"def foo(ab):\n bar()", tree) changed_ranges = tree.get_changed_ranges(new_tree) self.assertEqual(len(changed_ranges), 1) self.assertEqual(changed_ranges[0].start_byte, edit_offset) self.assertEqual(changed_ranges[0].start_point, (0, edit_offset)) self.assertEqual(changed_ranges[0].end_byte, edit_offset + 2) self.assertEqual(changed_ranges[0].end_point, (0, edit_offset + 2))
def main(file): this_directory = os.path.dirname(__file__) # filename = os.path.join(this_directory, '/relative/path/to/file/you/want') # This code is used to configure parsing tool Tree Sitter Language.build_library( # Store the library in the `build` directory os.path.join(this_directory, 'build/my-languages.so'), # Include one or more languages [ # 'vendor/tree-sitter-go', os.path.join(this_directory, 'vendor/tree-sitter-java') # 'vendor/tree-sitter-python' ]) java_lang = Language(os.path.join(this_directory, 'build/my-languages.so'), 'java') # Parsing algorithm starts here parser = Parser() parser.set_language(java_lang) # For debugging tree_sitter_tree = parser.parse(read_file(file)) # For production # tree_sitter_tree = parser.parse(read_file(file)) gumtree_ast = to_gumtree_node(tree_sitter_tree.root_node) # everything should be inside the tag root_node = doc.createElement('root') # in test case they have context tag, which is empty. Do not know why we need it context_node = doc.createElement('context') # We append our root node to document doc.appendChild(root_node) # Append context tag to root node (<root> </root) root_node.appendChild(context_node) # append data into <root> tag. At this stage we append parsed code structure. root_node.appendChild(gumtree_ast) # Recursively add children nodes (if exist) process_node(tree_sitter_tree.root_node, gumtree_ast) xml = doc.toprettyxml() print(xml)
def add_lcom5(df, col): lang_builds = create_parser_builds() parser = Parser() class_lcom5 = [] for i in range(len(df)): ext = df["name"][i].split('.')[-1] parser.set_language(lang_builds[ext]) enc = df["encoding"][i] tree = parser.parse(bytes(df["contents"][i], df["encoding"][i])) class_lcom5.append(calculate_lcom5(tree, ext, bytes(df["contents"][i], df["encoding"][i]), df["name"][i])) df["class_lcom5"] = class_lcom5 return df
def test_multibyte_characters(self): parser = Parser() parser.set_language(JAVASCRIPT) source_code = bytes("'😎' && '🐍'", "utf8") tree = parser.parse(source_code) root_node = tree.root_node statement_node = root_node.children[0] binary_node = statement_node.children[0] snake_node = binary_node.children[2] self.assertEqual(binary_node.type, "binary_expression") self.assertEqual(snake_node.type, "string") self.assertEqual( source_code[snake_node.start_byte:snake_node.end_byte].decode( 'utf8'), "'🐍'")
def main(opt): parser = Parser() lang = Languages.get(opt.language[0]) parser.set_language(lang) lang_node_types_filename = "node_types_{}.csv".format(opt.language[0]) selected_node_types = {} if exists(lang_node_types_filename): lang_node_types = open(lang_node_types_filename, "r").read().splitlines() for lang_node_type in lang_node_types: selected_node_types[lang_node_type.lower()] = 1 data = open(opt.filename[0], "rb").read() tree = parser.parse(data) reports = {} s = print_subtree(data, tree.root_node, reports, selected_node_types) for report in reports: print(reports[report])
def test_point_range_captures(self): parser = Parser() parser.set_language(PYTHON) source = b"def foo():\n bar()\ndef baz():\n quux()\n" tree = parser.parse(source) query = PYTHON.query( """ (function_definition name: (identifier) @func-def) (call function: (identifier) @func-call) """ ) captures = query.captures(tree.root_node, start_point=(1, 0), end_point=(2, 0)) # FIXME: this test is incorrect self.assertEqual(captures[1][0].start_point, (1, 2)) self.assertEqual(captures[1][0].end_point, (1, 5)) self.assertEqual(captures[1][1], "func-call")
class TreeSitterLauncher: _parser: Parser def __init__(self, language, library_path): grammar = Language(library_path, language) self._parser = Parser() self._parser.set_language(grammar) def _get_code_bytes(self, filepath: str) -> bytes: file = open(filepath, "r") return bytes(file.read(), "utf-8") def parse_file(self, filepath: str) -> TreeAsDict: code_bytes = self._get_code_bytes(filepath) tree_sitter_tree = self._parser.parse(code_bytes) cursor = tree_sitter_tree.walk() return TreeBuilder(cursor, code_bytes).get_tree_as_dict()
def test_walk(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") cursor = tree.walk() # Node always returns the same instance self.assertIs(cursor.node, cursor.node) self.assertEqual(cursor.node.type, "module") self.assertEqual(cursor.node.start_byte, 0) self.assertEqual(cursor.node.end_byte, 18) self.assertEqual(cursor.node.start_point, (0, 0)) self.assertEqual(cursor.node.end_point, (1, 7)) self.assertEqual(cursor.current_field_name(), None) self.assertTrue(cursor.goto_first_child()) self.assertEqual(cursor.node.type, "function_definition") self.assertEqual(cursor.node.start_byte, 0) self.assertEqual(cursor.node.end_byte, 18) self.assertEqual(cursor.node.start_point, (0, 0)) self.assertEqual(cursor.node.end_point, (1, 7)) self.assertEqual(cursor.current_field_name(), None) self.assertTrue(cursor.goto_first_child()) self.assertEqual(cursor.node.type, "def") self.assertEqual(cursor.node.is_named, False) self.assertEqual(cursor.node.sexp(), '("def")') self.assertEqual(cursor.current_field_name(), None) def_node = cursor.node # Node remains cached after a failure to move self.assertFalse(cursor.goto_first_child()) self.assertIs(cursor.node, def_node) self.assertTrue(cursor.goto_next_sibling()) self.assertEqual(cursor.node.type, "identifier") self.assertEqual(cursor.node.is_named, True) self.assertEqual(cursor.current_field_name(), "name") self.assertFalse(cursor.goto_first_child()) self.assertTrue(cursor.goto_next_sibling()) self.assertEqual(cursor.node.type, "parameters") self.assertEqual(cursor.node.is_named, True) self.assertEqual(cursor.current_field_name(), "parameters")
class TSParser: def __init__(self, lang: str): self.lang = lang self.parser = TSBaseParser() self.tsLang = Language(LIBRARY_PATH, lang) self.parser.set_language(self.tsLang) def parse(self, code: str) -> Node: return self(code).root_node def sexp(self, code: str) -> str: return self.parse(code).sexp() def query(self, query: str, code: str) -> dict[str, str]: return dict((k, extract(v, code)) for v, k in self.tsLang.query( query).captures(self.parse(code))) def __call__(self, value: str) -> Tree: return self.parser.parse(bytes(value, "utf8"))
def test_children(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") root_node = tree.root_node self.assertEqual(root_node.type, "module") self.assertEqual(root_node.start_byte, 0) self.assertEqual(root_node.end_byte, 18) self.assertEqual(root_node.start_point, (0, 0)) self.assertEqual(root_node.end_point, (1, 7)) # List object is reused self.assertIs(root_node.children, root_node.children) fn_node = root_node.children[0] self.assertEqual(fn_node.type, "function_definition") self.assertEqual(fn_node.start_byte, 0) self.assertEqual(fn_node.end_byte, 18) self.assertEqual(fn_node.start_point, (0, 0)) self.assertEqual(fn_node.end_point, (1, 7)) def_node = fn_node.children[0] self.assertEqual(def_node.type, "def") self.assertEqual(def_node.is_named, False) id_node = fn_node.children[1] self.assertEqual(id_node.type, "identifier") self.assertEqual(id_node.is_named, True) self.assertEqual(len(id_node.children), 0) params_node = fn_node.children[2] self.assertEqual(params_node.type, "parameters") self.assertEqual(params_node.is_named, True) colon_node = fn_node.children[3] self.assertEqual(colon_node.type, ":") self.assertEqual(colon_node.is_named, False) statement_node = fn_node.children[4] self.assertEqual(statement_node.type, "block") self.assertEqual(statement_node.is_named, True)
def test_multibyte_characters_via_read_callback(self): parser = Parser() parser.set_language(JAVASCRIPT) source_code = bytes("'😎' && '🐍'", "utf8") def read(byte_position, point): return source_code[byte_position:byte_position+1] tree = parser.parse(read) root_node = tree.root_node statement_node = root_node.children[0] binary_node = statement_node.children[0] snake_node = binary_node.children[2] self.assertEqual(binary_node.type, "binary_expression") self.assertEqual(snake_node.type, "string") self.assertEqual( source_code[snake_node.start_byte:snake_node.end_byte].decode("utf8"), "'🐍'", )
def parse_program(program: str, parser: Parser = None, code2vec: Word2VecKeyedVectors = None) -> nx.DiGraph: if parser is None: parser: Parser = get_parser() tree = parser.parse(bytes(program, "utf8")) # 建立一个空的有向图 g: nx.DiGraph = nx.DiGraph() queue: Queue = Queue() queue.put(tree.root_node) while not queue.empty(): # 按照宽度优先的顺序来建立一个有向图 node = queue.get() if not hasattr(node, 'children'): continue # 依次将父节点与子节点连接起来:root-child 建立边的关系 for child in node.children: g.add_edge(TreeSitterNode(node, program), TreeSitterNode(child, program)) queue.put(child) # embedding are added to each node # 使用code2vec的嵌入表示来初始化表示图中的节点 if code2vec is not None: zeros = np.zeros(code2vec.vector_size) for node in g.nodes: name = node.name.lower() if name in code2vec: g.add_node(node, data=code2vec.get_vector(name)) else: g.add_node(node, data=zeros) return g