Ejemplo n.º 1
0
def code_repr(src: str):
    """
    convert source code to node tree, e.g.
    src = 'path.to.api'
    output = "Node(power, [Leaf(1, 'path'), Node(trailer, [Leaf(23, '.'), Leaf(1, 'to')]), Node(trailer, [Leaf(23, '.'), Leaf(1, 'api')])])"
    """
    driver_ = driver.Driver(python_grammar, convert=pytree.convert)
    tree = driver_.parse_stream(StringIO(src + '\n'))
    return tree
Ejemplo n.º 2
0
def main():
    gr = driver.load_grammar("Grammar.txt")
    dr = driver.Driver(gr, convert=pytree.convert)

    fn = "example.py"
    tree = dr.parse_file(fn, debug=True)
    if not diff(fn, tree):
        print("No diffs.")
    if not sys.argv[1:]:
        return  # Pass a dummy argument to run the complete test suite below

    problems = []

    # Process every imported module
    for name in sys.modules:
        mod = sys.modules[name]
        if mod is None or not hasattr(mod, "__file__"):
            continue
        fn = mod.__file__
        if fn.endswith(".pyc"):
            fn = fn[:-1]
        if not fn.endswith(".py"):
            continue
        print("Parsing", fn, file=sys.stderr)
        tree = dr.parse_file(fn, debug=True)
        if diff(fn, tree):
            problems.append(fn)

    # Process every single module on sys.path (but not in packages)
    for dir in sys.path:
        try:
            names = os.listdir(dir)
        except OSError:
            continue
        print("Scanning", dir, "...", file=sys.stderr)
        for name in names:
            if not name.endswith(".py"):
                continue
            print("Parsing", name, file=sys.stderr)
            fn = os.path.join(dir, name)
            try:
                tree = dr.parse_file(fn, debug=True)
            except pgen2.parse.ParseError as err:
                print("ParseError:", err)
            else:
                if diff(fn, tree):
                    problems.append(fn)

    # Show summary of problem files
    if not problems:
        print("No problems.  Congratulations!")
    else:
        print("Problems in following files:")
        for fn in problems:
            print("***", fn)
Ejemplo n.º 3
0
def parse_file(raw_blob):
    try:
        as_json = json.loads(raw_blob)
        contents = as_json['source_code'] + '\n'
        from_file = as_json['from_file']

        parser = driver.Driver(pygram.python_grammar, convert=pytree.convert)

        names_map = token.tok_name
        for key, value in pygram.python_grammar.symbol2number.items():
            names_map[value] = key

        the_ast = parser.parse_string(contents)
        flattened_json = []

        def _traverse(node):
            cur_idx = len(flattened_json)
            if node.type in names_map:
                flattened_json.append({
                    'type':
                    names_map[node.type],
                    'value':
                    node.value
                    if isinstance(node, pytree.Leaf) else names_map[node.type],
                    'children': []
                })
            else:
                assert False, "Type not in map."
            if not isinstance(node, pytree.Leaf):
                for child in node.children:
                    flattened_json[cur_idx]["children"].append(
                        _traverse(child))
            return cur_idx

        _traverse(the_ast)

        final_tree = {'from_file': from_file, 'ast': flattened_json}

        return json.dumps(final_tree, separators=(',', ':'))
    except Exception as ex:
        return None
Ejemplo n.º 4
0
 def setUp(self):
     self.grammar = pygram.python_grammar
     self.driver = driver.Driver(self.grammar,
                                 convert=pytree.convert,
                                 logger=log)
Ejemplo n.º 5
0
def process(target):

    DataProcessor.PARSER.set_language(Language('/src/build/py-tree-sitter-languages.so', sys.argv[1]))
    processor = DataProcessor(
        language=sys.argv[1],
        language_parser=LANGUAGE_METADATA[sys.argv[1]]['language_parser']
    )
    
    results = []

    if target['language'] == 'java':
        try:
            javalang.parse.parse(target['the_code'])
        except Exception as ex:
            if sys.argv[2] != 'gz':
                print('Failed to validate: ' + target['from_file'])
                print(target['the_code'])
                print(ex)
            return False, []
    elif target['language'] == 'python':
        try:
            parser = driver.Driver(pygram.python_grammar, convert=pytree.convert)
            parser.parse_string(target['the_code'].strip() + '\n')
            ast.parse(target['the_code'])
        except Exception:
            if sys.argv[2] != 'gz':
                print('Failed to validate: ' + target['from_file'])
            return False, []

    functions = processor.process_blob(target['the_code'])
        
    for function in functions:
        sha256 = hashlib.sha256(
            function["function"].strip().encode('utf-8')
        ).hexdigest()

        if target['language'] == 'java':
            if JAVA_REJECT_REGEX.search(function["function"]):
                continue
            if sha256 in BANNED_JAVA_SHAS:
                # print("  - Skipped '{}'".format(sha256))
                continue # Spoon transformer chokes on these, so exclude
        elif target['language'] == 'python':
            if PY_REJECT_REGEX.search(function["function"]):
                continue
            if sha256 in BANNED_PY_SHAS:
                # print("  - Skipped '{}'".format(sha256))
                continue # Spoon transformer chokes on these, so exclude

        tokens_pre, tokens_post = ([], [])

        try:
            tokens_pre, tokens_post = remove_func_name(
                function["identifier"].split('.')[-1],
                function["function_tokens"]
            )
        except:
            continue
    
        results.append({
            "language": function["language"],
            "identifier": function["identifier"].split('.')[-1],
            "target_tokens": subtokenize(function["identifier"].split('.')[-1]),
            "source_tokens": tokens_post,
            "elided_tokens": tokens_pre,
            "source_code": function["function"] if function["language"] != "java" else (
                'class WRAPPER {\n' + function["function"] + '\n}\n'
            ),
            "sha256_hash": sha256,
            "split": target['split'],
            "from_file": target['from_file']
        })
    
    return True, results
Ejemplo n.º 6
0
from fissix.pgen2 import token as pgen2_token
from fissix.pygram import python_symbols
from fissix import patcomp

# it's handy to import this from here when developing
from bowler.helpers import print_tree as dumpTree

import os
from textwrap import dedent

# --------------------------------------------------------------------------------------------------
# load grammar & make a driver we can use to parse
fissix_dir = os.path.dirname( fissix.__file__ )
grammar_path = os.path.join( fissix_dir, "Grammar.txt" )
m_grammar = pgen2_driver.load_grammar( grammar_path )
driver = pgen2_driver.Driver( m_grammar, convert=fissix.pytree.convert )

def parseString( string ):
    return driver.parse_string( dedent( string ) + "\n\n", debug=True )

def getGrammar():
    return m_grammar


# --------------------------------------------------------------------------------------------------
def makeLeaf( type_name, value, prefix="" ):
    type_num = typeNameToNum( type_name )
    return Leaf( type_num, value, prefix=prefix )

def makeStatement():
    return Node( python_symbols.stmt, [] )
Ejemplo n.º 7
0
import unittest
import os
import os.path
from textwrap import dedent

# Local imports
from fissix import pytree, refactor
from fissix.pgen2 import driver as pgen2_driver

test_dir = os.path.dirname(__file__)
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
grammar_path = os.path.join(test_dir, "..", "Grammar.txt")
grammar = pgen2_driver.load_grammar(grammar_path)
grammar_no_print_statement = pgen2_driver.load_grammar(grammar_path)
del grammar_no_print_statement.keywords["print"]
driver = pgen2_driver.Driver(grammar, convert=pytree.convert)
driver_no_print_statement = pgen2_driver.Driver(
    grammar_no_print_statement, convert=pytree.convert
)


def parse_string(string):
    return driver.parse_string(reformat(string), debug=True)


def run_all_tests(test_mod=None, tests=None):
    if tests is None:
        tests = unittest.TestLoader().loadTestsFromModule(test_mod)
    unittest.TextTestRunner(verbosity=2).run(tests)