def checkAndGenerateAST(i, lstCFilesStep1, fopStep2, fopASTInfo,
                        fopStep4GraphAll, fopStep4GraphSimplify, fpLog, nlpObj,
                        offsetContext, isSaveGraph):
    fpMixFileCPP = lstCFilesStep1[i]
    lenFile = len(lstCFilesStep1)
    nameOfFile = os.path.basename(fpMixFileCPP)
    nameWithoutExtension = nameOfFile.replace('.cpp', '')
    fpCompiledCPP = fopStep2 + nameOfFile
    fpASTItem = fopASTInfo + nameOfFile.replace('.cpp', '_ast.txt')
    isRunOK = False
    try:
        parser = Parser()
        parser.set_language(CPP_LANGUAGE)
        # getJsonDict(fpCPP, fpDotGraphAllText, fpDotGraphAllImage, fpDotGraphSimplifyText, fpDotGraphSimplifyImage,
        #             parser, offsetContext)
        fpDotGraphAllText = fopStep4GraphAll + nameWithoutExtension + '_all.dot'
        fpDotGraphAllImage = fopStep4GraphAll + nameWithoutExtension + '_all.png'
        fpDotGraphSimplifyText = fopStep4GraphSimplify + nameWithoutExtension + '_simplify.dot'
        fpDotGraphSimplifyImage = fopStep4GraphSimplify + nameWithoutExtension + '_simplify.png'

        f1 = open(fpMixFileCPP, 'r')
        strItem = f1.read()
        f1.close()
        start_time = time.time()
        jsonObject = getJsonDict(fpMixFileCPP, fpDotGraphAllText,
                                 fpDotGraphAllImage, fpDotGraphSimplifyText,
                                 fpDotGraphSimplifyImage, parser, nlpObj,
                                 offsetContext, isSaveGraph)
        # strASTOfFile=walker.getRepresentASTFromFile(fpCodeFileCPP,indexTu)
        end_time = time.time()
        numWordItem = len(strItem.split())
        itemTimeProcess = (end_time - start_time)
        if str(jsonObject) != 'Error' or str(jsonObject) != 'None':
            # arrContentOfFile=strContentOfFile.split('\n')
            strContentAppend = '\n'.join(
                [nameOfFile, str(jsonObject), '\n\n\n'])
            f1 = open(fpASTItem, 'w')
            f1.write(strContentAppend)
            f1.close()
            shutil.copyfile(fpMixFileCPP, fopStep2 + nameOfFile)
            f1 = open(fpLog, 'a')
            f1.write('{}\t{}\n'.format(nameOfFile, 'True'))
            f1.close()
            isRunOK = True
            # print('{}\t{}'.format(strCommand,isRunOK))
        else:
            f1 = open(fpLog, 'a')
            f1.write('{}\t{}\n'.format(nameOfFile, 'False'))
            f1.close()
            # print('{}\t{}'.format(strCommand,isRunOK))
        print('OK {}/{} {}'.format(i, len(lstCFilesStep1), fpMixFileCPP))
    except:
        print("Exception in user code:")
        print("-" * 60)
        traceback.print_exc(file=sys.stdout)
        print("-" * 60)
        print('Error: {} {}'.format(i, fpMixFileCPP))
        print('Error {}/{} {}'.format(i, len(lstCFilesStep1), fpMixFileCPP))
    return i, numWordItem, itemTimeProcess
コード例 #2
0
    def test_field_name_for_child(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        tree = parser.parse(b"<div a={1} b={2} />")
        jsx_node = tree.root_node.children[0].children[0]

        self.assertEqual(jsx_node.field_name_for_child(0), None)
        self.assertEqual(jsx_node.field_name_for_child(1), "name")
コード例 #3
0
    def test_children_by_field_name(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        tree = parser.parse(b"<div a={1} b={2} />")
        jsx_node = tree.root_node.children[0].children[0]

        attributes = jsx_node.children_by_field_name("attribute")
        self.assertEqual([a.type for a in attributes], ["jsx_attribute", "jsx_attribute"])
コード例 #4
0
class Code_Parser():
    def __init__(self, grammar, language="python", parser_library_path='src/tree-sitter/tree-sitter-python', **kwargs):
        Language.build_library('/build/my-languages.so',[parser_library_path])
        
        LANGUAGE = Language('/build/my-languages.so', language)
        
        self.grammar = grammar
        
        self.TS_parser = Parser()
        self.TS_parser.set_language(LANGUAGE)
        self.node_builder = NodeBuilder(self.grammar)
    
    def code_to_sequence(self, code_str):
        tree = self.TS_parser.parse(bytes(code_str, "utf8"))
        root_node = tree.root_node
        sequence = self.TSTree_to_sequence(root_node, code_str)
        return sequence
    
    def TSTree_to_sequence(self, TSNode, code_str):
        node_sequence = [TSNode.type]
        if TSNode.type == "string":
            node_text = sub_str_from_coords(code_str, TSNode.start_point, TSNode.end_point)[1:-1]
            node_sequence += ["_string_start",'"',"<REDUCE>"]
            node_sequence += ["_string_content",node_text,"<REDUCE>"]
            node_sequence += ["_string_end",'"',"<REDUCE>"]
        elif TSNode.children == []:
            node_text = sub_str_from_coords(code_str, TSNode.start_point, TSNode.end_point)
            if TSNode.type != node_text:
                node_sequence.append(node_text)
        elif TSNode.children != []:
            for child in TSNode.children:
                node_sequence += self.TSTree_to_sequence(child, code_str)
        node_sequence.append("<REDUCE>")
        return node_sequence
    
    def is_valid_sequence(self, sequence):
        first_node = sequence[0]
        if first_node != "module":
            return False
        partial_tree = PartialTree(first_node, self.node_builder)
        try:
            for expansion in sequence[1:]:
                partial_tree.add_action(expansion)
        except Exception as e:
            return False
        return True 
        
    def sequence_to_partial_tree(self, sequence):
        first_node = sequence[0]
        partial_tree = PartialTree(first_node, self.node_builder)
        try:
            for expansion in sequence[1:]:
                partial_tree.add_action(expansion)
        except Exception as e:
            print("ERROR!")
            traceback.print_exc()
            print("-------")
        return partial_tree 
コード例 #5
0
ファイル: install_parsers.py プロジェクト: nokia/codesearch
def get_parser(language):
    language = LANGUAGE_ALIASES.get(language, language)
    if language in PARSERS:
        return PARSERS[language]
    LANGUAGE = Language(tree_sitter_build, language)
    parser = Parser()
    parser.set_language(LANGUAGE)
    PARSERS[language] = parser
    return parser
コード例 #6
0
def get_parser(so_path: str = None) -> Parser:
    if so_path is None:
        so_path = JAVA_SO_PATH

    JAVA_LANGUAGE = Language(so_path, 'java')

    parser = Parser()
    parser.set_language(JAVA_LANGUAGE)

    return parser
コード例 #7
0
ファイル: tree_sitter.py プロジェクト: aksh-at/cf-search
    def __init__(self) -> None:
        # assume submodules exist
        vendor_dirs = ["vendor/tree-sitter-%s" % l for l in TREE_SITTER_LANGS]
        Language.build_library(BUILD_PATH, vendor_dirs)

        self.parsers = {}
        for l in TREE_SITTER_LANGS:
            parser = Parser()
            parser.set_language(Language(BUILD_PATH, "haskell"))
            self.parsers[l] = parser
コード例 #8
0
    def test_node_text(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"[0, [1, 2, 3]]")

        self.assertEqual(tree.text, b"[0, [1, 2, 3]]")

        root_node = tree.root_node
        self.assertEqual(root_node.text, b'[0, [1, 2, 3]]')

        exp_stmt_node = root_node.children[0]
        self.assertEqual(exp_stmt_node.text, b'[0, [1, 2, 3]]')

        list_node = exp_stmt_node.children[0]
        self.assertEqual(list_node.text, b'[0, [1, 2, 3]]')

        open_delim_node = list_node.children[0]
        self.assertEqual(open_delim_node.text, b'[')

        first_num_node = list_node.children[1]
        self.assertEqual(first_num_node.text, b'0')

        first_comma_node = list_node.children[2]
        self.assertEqual(first_comma_node.text, b',')

        child_list_node = list_node.children[3]
        self.assertEqual(child_list_node.text, b'[1, 2, 3]')

        close_delim_node = list_node.children[4]
        self.assertEqual(close_delim_node.text, b']')

        edit_offset = len(b"[0, [")
        tree.edit(
            start_byte=edit_offset,
            old_end_byte=edit_offset,
            new_end_byte=edit_offset + 2,
            start_point=(0, edit_offset),
            old_end_point=(0, edit_offset),
            new_end_point=(0, edit_offset + 2),
        )
        self.assertEqual(tree.text, None)

        root_node_again = tree.root_node
        self.assertEqual(root_node_again.text, None)

        tree_text_false = parser.parse(b"[0, [1, 2, 3]]", keep_text=False)
        self.assertIsNone(tree_text_false.text)
        root_node_text_false = tree_text_false.root_node
        self.assertIsNone(root_node_text_false.text)

        tree_text_true = parser.parse(b"[0, [1, 2, 3]]", keep_text=True)
        self.assertEqual(tree_text_true.text, b"[0, [1, 2, 3]]")
        root_node_text_true = tree_text_true.root_node
        self.assertEqual(root_node_text_true.text, b"[0, [1, 2, 3]]")
コード例 #9
0
    def test_tree_cursor_without_tree(self):
        parser = Parser()
        parser.set_language(PYTHON)

        def parse():
            tree = parser.parse(b"def foo():\n  bar()")
            return tree.walk()

        cursor = parse()
        self.assertIs(cursor.node, cursor.node)
        for item in cursor.node.children:
            self.assertIsNotNone(item.is_named)
コード例 #10
0
    def test_text_predicates_errors(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        with self.assertRaises(RuntimeError):
            JAVASCRIPT.query("""
            (
                (function_declaration
                    name: (identifier) @function-name
                )
                (#eq? @function-name @function-name fun1)
            )
            """)

        with self.assertRaises(RuntimeError):
            JAVASCRIPT.query("""
            (
                (function_declaration
                    name: (identifier) @function-name
                )
                (#eq? fun1 @function-name)
            )
            """)

        with self.assertRaises(RuntimeError):
            JAVASCRIPT.query("""
            (
                (function_declaration
                    name: (identifier) @function-name
                )
                (#match? @function-name @function-name fun1)
            )
            """)

        with self.assertRaises(RuntimeError):
            JAVASCRIPT.query("""
            (
                (function_declaration
                    name: (identifier) @function-name
                )
                (#match? fun1 @function-name)
            )
            """)

        with self.assertRaises(RuntimeError):
            JAVASCRIPT.query("""
            (
                (function_declaration
                    name: (identifier) @function-name
                )
                (#match? @function-name @function-name)
            )
            """)
コード例 #11
0
ファイル: syntax_match.py プロジェクト: modit-team/MODIT
def corpus_syntax_match(references, candidates, lang):
    JAVA_LANGUAGE = Language('parser/my-languages.so', lang)
    parser = Parser()
    parser.set_language(JAVA_LANGUAGE)
    match_count = 0
    total_count = 0

    for i in range(len(candidates)):
        references_sample = references[i]
        candidate = candidates[i]
        for reference in references_sample:
            try:
                candidate = remove_comments_and_docstrings(candidate, 'java')
            except:
                pass
            try:
                reference = remove_comments_and_docstrings(reference, 'java')
            except:
                pass

            candidate_tree = parser.parse(bytes(candidate, 'utf8')).root_node

            reference_tree = parser.parse(bytes(reference, 'utf8')).root_node

            def get_all_sub_trees(root_node):
                node_stack = []
                sub_tree_sexp_list = []
                depth = 1
                node_stack.append([root_node, depth])
                while len(node_stack) != 0:
                    cur_node, cur_depth = node_stack.pop()
                    sub_tree_sexp_list.append([cur_node.sexp(), cur_depth])
                    for child_node in cur_node.children:
                        if len(child_node.children) != 0:
                            depth = cur_depth + 1
                            node_stack.append([child_node, depth])
                return sub_tree_sexp_list

            cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)]
            ref_sexps = get_all_sub_trees(reference_tree)

            # print(cand_sexps)
            # print(ref_sexps)

            for sub_tree, depth in ref_sexps:
                if sub_tree in cand_sexps:
                    match_count += 1
            total_count += len(ref_sexps)

    score = match_count / total_count
    return score
コード例 #12
0
def get_parser(lang: str) -> Parser:
    """
    Initialize parser for a specific language.
    :param lang: language to use.
    :return: parser.
    """
    global PARSERS
    if lang not in PARSERS:
        parser = Parser()
        parser.set_language(Language(get_tree_sitter_so(), lang))
        PARSERS[lang] = parser
    else:
        parser = PARSERS[lang]
    return parser
コード例 #13
0
 def test_set_language(self):
     parser = Parser()
     parser.set_language(PYTHON)
     tree = parser.parse(b"def foo():\n  bar()")
     self.assertEqual(
         tree.root_node.sexp(),
         "(module (function_definition (identifier) (parameters) (expression_statement (call (identifier) (argument_list)))))"
     )
     parser.set_language(JAVASCRIPT)
     tree = parser.parse(b"function foo() {\n  bar();\n}")
     self.assertEqual(
         tree.root_node.sexp(),
         "(program (function (identifier) (formal_parameters) (statement_block (expression_statement (call_expression (identifier) (arguments))))))"
     )
コード例 #14
0
ファイル: main.py プロジェクト: Krosent/gumtree-java-xml-gen
def main(file):

    this_directory = os.path.dirname(__file__)
    # filename = os.path.join(this_directory, '/relative/path/to/file/you/want')
    # This code is used to configure parsing tool Tree Sitter
    Language.build_library(
        # Store the library in the `build` directory
        os.path.join(this_directory, 'build/my-languages.so'),

        # Include one or more languages
        [
            # 'vendor/tree-sitter-go',
            os.path.join(this_directory, 'vendor/tree-sitter-java')
            # 'vendor/tree-sitter-python'
        ])
    java_lang = Language(os.path.join(this_directory, 'build/my-languages.so'),
                         'java')

    # Parsing algorithm starts here
    parser = Parser()
    parser.set_language(java_lang)

    # For debugging
    tree_sitter_tree = parser.parse(read_file(file))

    # For production
    # tree_sitter_tree = parser.parse(read_file(file))

    gumtree_ast = to_gumtree_node(tree_sitter_tree.root_node)

    # everything should be inside the tag
    root_node = doc.createElement('root')

    # in test case they have context tag, which is empty. Do not know why we need it
    context_node = doc.createElement('context')

    # We append our root node to document
    doc.appendChild(root_node)

    # Append context tag to root node (<root> </root)
    root_node.appendChild(context_node)

    # append data into <root> tag. At this stage we append parsed code structure.
    root_node.appendChild(gumtree_ast)

    # Recursively add children nodes (if exist)
    process_node(tree_sitter_tree.root_node, gumtree_ast)

    xml = doc.toprettyxml()
    print(xml)
コード例 #15
0
ファイル: java.py プロジェクト: WM-SEMERU/ds4se
def add_lcom5(df, col):
    lang_builds = create_parser_builds()
    parser = Parser()
    class_lcom5 = []

    for i in range(len(df)):
        ext = df["name"][i].split('.')[-1]
        parser.set_language(lang_builds[ext])
        enc = df["encoding"][i]
        tree = parser.parse(bytes(df["contents"][i], df["encoding"][i]))
        class_lcom5.append(calculate_lcom5(tree, ext, bytes(df["contents"][i], df["encoding"][i]), df["name"][i]))
    df["class_lcom5"] = class_lcom5

    return df
コード例 #16
0
    def test_multibyte_characters(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        source_code = bytes("'😎' && '🐍'", "utf8")
        tree = parser.parse(source_code)
        root_node = tree.root_node
        statement_node = root_node.children[0]
        binary_node = statement_node.children[0]
        snake_node = binary_node.children[2]

        self.assertEqual(binary_node.type, "binary_expression")
        self.assertEqual(snake_node.type, "string")
        self.assertEqual(
            source_code[snake_node.start_byte:snake_node.end_byte].decode(
                'utf8'), "'🐍'")
コード例 #17
0
def jobs(repo_path, args):
    PARSER = Parser()
    PARSER.set_language(Language(args.tree_sitter, args.lang))

    n_file_per_commit = Counter()
    add_tokens_per_del_tokens = []

    if os.path.exists(repo_path):
        submodule = os.path.join(repo_path, '.gitmodules')
        if os.path.exists(submodule):
            os.remove(submodule)

        try:
            n_stored_commit = 0
            for commit in RepositoryMining(
                repo_path,
                only_no_merge=True,
                only_in_branch='master',
                only_modifications_with_file_types=language_ext[args.lang]
            ).traverse_commits():
                if n_stored_commit > args.max_commit_number:
                    break

                cleaned_message = message_cleaner(commit.msg)
                if not cleaned_message:
                    continue
                commit_tokens = tokenize_docstring_from_string(cleaned_message)

                if len(commit_tokens) < args.min_target_length:
                    continue

                addeds, deleteds, n_files = get_code_diff(commit, PARSER, args)
                if 1 <= n_files and n_files <= args.max_duplicate:
                    with jsonlines.open(args.output_file, mode="a") as writer:
                        writer.write(
                            {
                                "commit_tokens": commit_tokens,
                                "add_tokens": addeds[0],
                                "del_tokens": deleteds[0],
                            }
                        )
                    add_tokens_per_del_tokens.append( len(addeds[0]) / len(deleteds[0]) )
                    n_file_per_commit.update({n_files})
                    n_stored_commit += 1
        except:
            pass

    return (n_file_per_commit, add_tokens_per_del_tokens)
コード例 #18
0
ファイル: subtree.py プロジェクト: fivejjs/infercode
def main(opt):
        parser = Parser()
        lang = Languages.get(opt.language[0])
        parser.set_language(lang)
        lang_node_types_filename = "node_types_{}.csv".format(opt.language[0])
        selected_node_types = {}
        if exists(lang_node_types_filename):
            lang_node_types = open(lang_node_types_filename, "r").read().splitlines()
            for lang_node_type in lang_node_types:
                selected_node_types[lang_node_type.lower()] = 1
        data = open(opt.filename[0], "rb").read()
        tree = parser.parse(data)
        reports = {}
        s = print_subtree(data, tree.root_node, reports, selected_node_types)
        for report in reports:
            print(reports[report])
コード例 #19
0
def get_parser(lang: str, so_path: str = None) -> Parser:
    if so_path is None:
        so_path = SO_PATH

    # global PARSERS
    # if lang in PARSERS:
    #     return PARSERS[lang]

    LANG = Language(so_path, lang)

    parser = Parser()
    parser.set_language(LANG)

    # PARSERS[lang] = parser

    return parser
コード例 #20
0
    def test_byte_range_captures(self):
        parser = Parser()
        parser.set_language(PYTHON)
        source = b"def foo():\n  bar()\ndef baz():\n  quux()\n"
        tree = parser.parse(source)
        query = PYTHON.query(
            """
            (function_definition name: (identifier) @func-def)
            (call function: (identifier) @func-call)
            """
        )

        captures = query.captures(tree.root_node, start_byte=10, end_byte=20)
        self.assertEqual(captures[0][0].start_point, (1, 2))
        self.assertEqual(captures[0][0].end_point, (1, 5))
        self.assertEqual(captures[0][1], "func-call")
コード例 #21
0
def codebleu(reference, candidate, weights=[0.1, 0.1, 0.4, 0.4]):
    parser = Parser()
    PY_LANGUAGE = Language('./my-languages.so', 'python')
    parser.set_language(PY_LANGUAGE)
    lattice = TypeLatticeGenerator('typingRules.json')
    scores = pure_bleu(reference, candidate), weighted_bleu(
        reference,
        candidate), ast_match(reference, candidate,
                              parser), dfg_match(reference, candidate, lattice)
    final_score = 0.0
    norm = 0.0
    for i, item in enumerate(scores):
        if item != -1:  #if we can't compute some metric, we shouldn't include it in the score
            final_score += item * weights[i]
            norm += weights[i]
    final_score = final_score / norm
    return final_score
コード例 #22
0
ファイル: main.py プロジェクト: JetBrains-Research/astminer
class TreeSitterLauncher:
    _parser: Parser

    def __init__(self, language, library_path):
        grammar = Language(library_path, language)
        self._parser = Parser()
        self._parser.set_language(grammar)

    def _get_code_bytes(self, filepath: str) -> bytes:
        file = open(filepath, "r")
        return bytes(file.read(), "utf-8")

    def parse_file(self, filepath: str) -> TreeAsDict:
        code_bytes = self._get_code_bytes(filepath)
        tree_sitter_tree = self._parser.parse(code_bytes)
        cursor = tree_sitter_tree.walk()
        return TreeBuilder(cursor, code_bytes).get_tree_as_dict()
コード例 #23
0
    def test_walk(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")
        cursor = tree.walk()

        # Node always returns the same instance
        self.assertIs(cursor.node, cursor.node)

        self.assertEqual(cursor.node.type, "module")
        self.assertEqual(cursor.node.start_byte, 0)
        self.assertEqual(cursor.node.end_byte, 18)
        self.assertEqual(cursor.node.start_point, (0, 0))
        self.assertEqual(cursor.node.end_point, (1, 7))
        self.assertEqual(cursor.current_field_name(), None)

        self.assertTrue(cursor.goto_first_child())
        self.assertEqual(cursor.node.type, "function_definition")
        self.assertEqual(cursor.node.start_byte, 0)
        self.assertEqual(cursor.node.end_byte, 18)
        self.assertEqual(cursor.node.start_point, (0, 0))
        self.assertEqual(cursor.node.end_point, (1, 7))
        self.assertEqual(cursor.current_field_name(), None)

        self.assertTrue(cursor.goto_first_child())
        self.assertEqual(cursor.node.type, "def")
        self.assertEqual(cursor.node.is_named, False)
        self.assertEqual(cursor.node.sexp(), '("def")')
        self.assertEqual(cursor.current_field_name(), None)
        def_node = cursor.node

        # Node remains cached after a failure to move
        self.assertFalse(cursor.goto_first_child())
        self.assertIs(cursor.node, def_node)

        self.assertTrue(cursor.goto_next_sibling())
        self.assertEqual(cursor.node.type, "identifier")
        self.assertEqual(cursor.node.is_named, True)
        self.assertEqual(cursor.current_field_name(), "name")
        self.assertFalse(cursor.goto_first_child())

        self.assertTrue(cursor.goto_next_sibling())
        self.assertEqual(cursor.node.type, "parameters")
        self.assertEqual(cursor.node.is_named, True)
        self.assertEqual(cursor.current_field_name(), "parameters")
コード例 #24
0
ファイル: __init__.py プロジェクト: sebastien/cells
class TSParser:
    def __init__(self, lang: str):
        self.lang = lang
        self.parser = TSBaseParser()
        self.tsLang = Language(LIBRARY_PATH, lang)
        self.parser.set_language(self.tsLang)

    def parse(self, code: str) -> Node:
        return self(code).root_node

    def sexp(self, code: str) -> str:
        return self.parse(code).sexp()

    def query(self, query: str, code: str) -> dict[str, str]:
        return dict((k, extract(v, code)) for v, k in self.tsLang.query(
            query).captures(self.parse(code)))

    def __call__(self, value: str) -> Tree:
        return self.parser.parse(bytes(value, "utf8"))
コード例 #25
0
    def test_children(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")

        root_node = tree.root_node
        self.assertEqual(root_node.type, "module")
        self.assertEqual(root_node.start_byte, 0)
        self.assertEqual(root_node.end_byte, 18)
        self.assertEqual(root_node.start_point, (0, 0))
        self.assertEqual(root_node.end_point, (1, 7))

        # List object is reused
        self.assertIs(root_node.children, root_node.children)

        fn_node = root_node.children[0]
        self.assertEqual(fn_node.type, "function_definition")
        self.assertEqual(fn_node.start_byte, 0)
        self.assertEqual(fn_node.end_byte, 18)
        self.assertEqual(fn_node.start_point, (0, 0))
        self.assertEqual(fn_node.end_point, (1, 7))

        def_node = fn_node.children[0]
        self.assertEqual(def_node.type, "def")
        self.assertEqual(def_node.is_named, False)

        id_node = fn_node.children[1]
        self.assertEqual(id_node.type, "identifier")
        self.assertEqual(id_node.is_named, True)
        self.assertEqual(len(id_node.children), 0)

        params_node = fn_node.children[2]
        self.assertEqual(params_node.type, "parameters")
        self.assertEqual(params_node.is_named, True)

        colon_node = fn_node.children[3]
        self.assertEqual(colon_node.type, ":")
        self.assertEqual(colon_node.is_named, False)

        statement_node = fn_node.children[4]
        self.assertEqual(statement_node.type, "block")
        self.assertEqual(statement_node.is_named, True)
コード例 #26
0
    def test_multibyte_characters_via_read_callback(self):
        parser = Parser()
        parser.set_language(JAVASCRIPT)
        source_code = bytes("'😎' && '🐍'", "utf8")

        def read(byte_position, point):
            return source_code[byte_position:byte_position+1]

        tree = parser.parse(read)
        root_node = tree.root_node
        statement_node = root_node.children[0]
        binary_node = statement_node.children[0]
        snake_node = binary_node.children[2]

        self.assertEqual(binary_node.type, "binary_expression")
        self.assertEqual(snake_node.type, "string")
        self.assertEqual(
            source_code[snake_node.start_byte:snake_node.end_byte].decode("utf8"),
            "'🐍'",
        )
コード例 #27
0
    def test_tree(self):
        code = b"def foo():\n  bar()\n\ndef foo():\n  bar()"
        parser = Parser()
        parser.set_language(PYTHON)

        def parse_root(bytes_):
            tree = parser.parse(bytes_)
            return tree.root_node

        root = parse_root(code)
        for item in root.children:
            self.assertIsNotNone(item.is_named)

        def parse_root_children(bytes_):
            tree = parser.parse(bytes_)
            return tree.root_node.children

        children = parse_root_children(code)
        for item in children:
            self.assertIsNotNone(item.is_named)
コード例 #28
0
    def test_edit(self):
        parser = Parser()
        parser.set_language(PYTHON)
        tree = parser.parse(b"def foo():\n  bar()")

        edit_offset = len(b"def foo(")
        tree.edit(
            start_byte=edit_offset,
            old_end_byte=edit_offset,
            new_end_byte=edit_offset + 2,
            start_point=(0, edit_offset),
            old_end_point=(0, edit_offset),
            new_end_point=(0, edit_offset + 2),
        )

        fn_node = tree.root_node.children[0]
        self.assertEqual(fn_node.type, "function_definition")
        self.assertTrue(fn_node.has_changes)
        self.assertFalse(fn_node.children[0].has_changes)
        self.assertFalse(fn_node.children[1].has_changes)
        self.assertFalse(fn_node.children[3].has_changes)

        params_node = fn_node.children[2]
        self.assertEqual(params_node.type, "parameters")
        self.assertTrue(params_node.has_changes)
        self.assertEqual(params_node.start_point, (0, edit_offset - 1))
        self.assertEqual(params_node.end_point, (0, edit_offset + 3))

        new_tree = parser.parse(b"def foo(ab):\n  bar()", tree)
        self.assertEqual(
            new_tree.root_node.sexp(),
            trim(
                """(module (function_definition
                name: (identifier)
                parameters: (parameters (identifier))
                body: (block
                    (expression_statement (call
                        function: (identifier)
                        arguments: (argument_list))))))"""
            ),
        )
コード例 #29
0
 def __init__(self,
              code,
              language='python',
              tree_style='SPT',
              path_style='L2L'):
     # AST | SPT || HST | HPT
     self.tree_style = tree_style
     # L2L | UD | U2D
     self.path_style = path_style
     # Use the Language.build_library method to compile these
     # into a library that's usable from Python:
     csn_so = '../build/csn.so'
     # Language.build_library(
     #   csn_so,
     #   [
     #     '../vendor/tree-sitter-go',
     #     '../vendor/tree-sitter-java',
     #     '../vendor/tree-sitter-javascript',
     #     '../vendor/tree-sitter-php',
     #     '../vendor/tree-sitter-python',
     #     '../vendor/tree-sitter-ruby',
     #   ]
     # )
     parser = Parser()
     # Load the languages into your app as Language objects:
     # ('go', 'java', 'javascript', 'php', 'python', 'ruby')
     parser.set_language(Language(csn_so, language))
     tree = parser.parse(code.encode())
     code_lines = code.split('\n')
     self.root, self.terminals, self.num_eldest = self.traverse(
         tree, code_lines)
     self.terminal_nodes = list()
     self.nonterminal_nodes = list()
     self.leafpath_terminal_nodes = list()
     self.leafpath_nonterminal_nodes = list()
     self.rootpath_terminal_nodes = list()
     self.rootpath_nonterminal_nodes = list()
     self.debug = False
     if self.debug:
         print(f'{"@" * 9}code\n{code}')
         print(f'{"@" * 9}sexp\n{tree.root_node.sexp()}')
コード例 #30
0
def corpus_dataflow_match(references, candidates, lang):
    LANGUAGE = Language('parser/my-languages.so', lang)
    parser = Parser()
    parser.set_language(LANGUAGE)
    parser = [parser, dfg_function[lang]]
    match_count = 0
    total_count = 0

    for i in range(len(candidates)):
        references_sample = references[i]
        candidate = candidates[i]
        for reference in references_sample:
            try:
                candidate = remove_comments_and_docstrings(candidate, 'java')
            except:
                pass
            try:
                reference = remove_comments_and_docstrings(reference, 'java')
            except:
                pass

            cand_dfg = get_data_flow(candidate, parser)
            ref_dfg = get_data_flow(reference, parser)

            normalized_cand_dfg = normalize_dataflow(cand_dfg)
            normalized_ref_dfg = normalize_dataflow(ref_dfg)

            if len(normalized_ref_dfg) > 0:
                total_count += len(normalized_ref_dfg)
                for dataflow in normalized_ref_dfg:
                    if dataflow in normalized_cand_dfg:
                        match_count += 1
                        normalized_cand_dfg.remove(dataflow)
    if total_count == 0:
        print(
            "WARNING: There is no reference data-flows extracted from the whole corpus, and the data-flow match score degenerates to 0. Please consider ignoring this score."
        )
        return 0
    score = match_count / total_count
    return score