コード例 #1
0
def asttok(code_str: str, tree: ast.Module) -> asttokens.ASTTokens:
    return asttokens.ASTTokens(code_str, tree=tree)
コード例 #2
0
ファイル: pyparser.py プロジェクト: stspbu/code-change-miner
def parse_file(filename):
    tree = asttokens.ASTTokens(read_file_to_string(filename), parse=True).tree

    json_tree = []

    def localize(node, json_node):
        json_node['lineno'] = str(node.first_token.start[0])
        json_node['col'] = str(node.first_token.start[1])
        json_node['end_line_no'] = str(node.last_token.end[0])
        json_node['end_col'] = str(node.last_token.end[1])

    def gen_identifier(identifier, node_type='identifier', node=None):
        pos = len(json_tree)
        json_node = {}
        json_tree.append(json_node)
        json_node['type'] = node_type
        json_node['value'] = identifier
        localize(node, json_node)
        return pos

    def traverse_list(l, node_type='list', node=None):
        pos = len(json_tree)
        json_node = {}
        json_tree.append(json_node)
        json_node['type'] = node_type
        localize(node, json_node)
        children = []
        for item in l:
            children.append(traverse(item))
        if (len(children) != 0):
            json_node['children'] = children
        return pos

    def traverse(node):
        pos = len(json_tree)
        json_node = {}
        json_tree.append(json_node)
        json_node['type'] = type(node).__name__
        localize(node, json_node)
        children = []
        if isinstance(node, ast.Name):
            json_node['value'] = node.id
        elif isinstance(node, ast.NameConstant):
            json_node['value'] = node.value
        elif isinstance(node, ast.Constant):
            json_node['value'] = node.value
        elif isinstance(node, ast.Num):
            json_node['value'] = (node.n)
        elif isinstance(node, ast.Str):
            json_node['value'] = node.s
        elif isinstance(node, ast.alias):
            json_node['value'] = (node.name)
            if node.asname:
                children.append(gen_identifier(node.asname, node=node))
        elif isinstance(node, ast.FunctionDef):
            json_node['value'] = (node.name)
        elif isinstance(node, ast.ExceptHandler):
            if node.name:
                json_node['value'] = node.name
        elif isinstance(node, ast.ClassDef):
            json_node['value'] = (node.name)
        elif isinstance(node, ast.ImportFrom):
            if node.module:
                json_node['value'] = (node.module)
        elif isinstance(node, ast.Global):
            for n in node.names:
                children.append(gen_identifier(n, node=node))
        elif isinstance(node, ast.keyword):
            json_node['value'] = (node.arg)
        elif isinstance(node, ast.arg):
            json_node['value'] = (node.arg)

        # Process children.
        if isinstance(node, ast.For):
            children.append(traverse(node.target))
            children.append(traverse(node.iter))
            children.append(traverse_list(node.body, 'body', node))
            if node.orelse:
                children.append(traverse_list(node.orelse, 'orelse', node))
        elif isinstance(node, ast.If) or isinstance(node, ast.While):
            children.append(traverse(node.test))
            children.append(traverse_list(node.body, 'body', node))
            if node.orelse:
                children.append(traverse_list(node.orelse, 'orelse', node))
        elif isinstance(node, ast.With):
            children.append(traverse_list(node.items, 'items', node))
            children.append(traverse_list(node.body, 'body', node))
        elif isinstance(node, ast.withitem):
            children.append(traverse(node.context_expr))
            if node.optional_vars:
                children.append(traverse(node.optional_vars))
        elif isinstance(node, ast.Try):
            children.append(traverse_list(node.body, 'body', node))
            children.append(traverse_list(node.handlers, 'handlers', node))
            if node.orelse:
                children.append(traverse_list(node.orelse, 'orelse', node))
            if node.finalbody:
                children.append(
                    traverse_list(node.finalbody, 'finalbody', node))
        elif isinstance(node, ast.arguments):
            children.append(traverse_list(node.args, 'args', node))
            children.append(traverse_list(node.defaults, 'defaults', node))
            children.append(traverse_list(node.kwonlyargs, 'defaults', node))
            children.append(traverse_list(node.kw_defaults, 'defaults', node))
            if node.vararg:
                children.append(
                    gen_identifier(node.vararg.arg, 'vararg', node.vararg))
            if node.kwarg:
                children.append(
                    gen_identifier(node.kwarg.arg, 'kwarg', node.kwarg))
        elif isinstance(node, ast.ExceptHandler):
            if node.type:
                children.append(traverse_list([node.type], 'type', node))
            children.append(traverse_list(node.body, 'body', node))
        elif isinstance(node, ast.ClassDef):
            children.append(traverse_list(node.bases, 'bases', node))
            children.append(traverse_list(node.body, 'body', node))
            children.append(
                traverse_list(node.decorator_list, 'decorator_list', node))
        elif isinstance(node, ast.FunctionDef):
            children.append(traverse(node.args))
            children.append(traverse_list(node.body, 'body', node))
            children.append(
                traverse_list(node.decorator_list, 'decorator_list', node))
        else:
            # Default handling: iterate over children.
            for child in ast.iter_child_nodes(node):
                if isinstance(child, ast.expr_context) or isinstance(
                        child, ast.operator) or isinstance(
                            child, ast.boolop) or isinstance(
                                child, ast.unaryop) or isinstance(
                                    child, ast.cmpop):
                    # Directly include expr_context, and operators into the type instead of creating a child.
                    json_node['type'] = json_node['type'] + type(
                        child).__name__
                else:
                    children.append(traverse(child))

        if isinstance(node, ast.Attribute):
            children.append(gen_identifier(node.attr, 'attr', node))

        if (len(children) != 0):
            json_node['children'] = children

        return pos

    traverse(tree)
    return json_tree
コード例 #3
0
	def __init__(self, source: str):
		self.source = source
		self.tokens = asttokens.ASTTokens(source, parse=True)
		self.replacements: List[Tuple[Tuple[int, int], str]] = []
コード例 #4
0
ファイル: statement_parser.py プロジェクト: Suvodeep90/SLACC
 def parse(self):
     source_code = cache.read_file(self.file_path)
     self.ast_tokenized = asttokens.ASTTokens(source_code, parse=True)
     self.set_root_method()
     meta = {"method": self._root_method}
     self.visit(self.ast_tokenized.tree, meta)
コード例 #5
0
 def _get_text(self, code):
     atok = asttokens.ASTTokens(code, tree=self)
     return atok.get_text(self)
コード例 #6
0
ファイル: parse.py プロジェクト: wo0dyn/raincoat
 def load(self):
     self.nodes = {}
     self.marked_ast = asttokens.ASTTokens(self.source, parse=True)
     self.visit(self.marked_ast.tree)
     return self.nodes
コード例 #7
0
def loop(clauses, meta_data, back_end, code_object, *args, **kwargs):
    """
    From the docs:
    The loop construct can describe what type of parallelism to use to
    execute the loop and declare private variables and arrays and reduction
    operations.

    Allowable clauses are:
    - collapse( n )
    - gang [( gang-arg-list )]
    - worker [( [num:]int-expr )]
    - vector [( [length:]int-expr )]
    - seq
    - auto
    - tile( size-expr-list )
    - device_type( device-type-list )
    - independent
    - private( var-list )
    - reduction( operator:var-list )

        Where gang-arg is one of:
        - [num:]int-expr
        - static:size-expr
        and gang-arg-list may have at most one num and one static argument,
        and where size-expr is one of:
        - *
        - int-expr

    Restrictions:
    - Only the collapse, gang, worker, vector, seq, auto and tile clauses may
      follow a device_type clause.
    - The int-expr argument to the worker and vector clauses must be
      invariant in the kernels region.
    - A loop associated with a loop construct that does not have a seq
      clause must be written such that the loop iteration count is
      computable when entering the loop construct.
    """
    index = 0
    while index != -1:
        index, code_object = _apply_clause(index, clauses, code_object,
                                           meta_data, back_end)

    # TODO: This is proof of concept stuff

    atok = asttokens.ASTTokens(code_object.src, parse=True)
    tree = atok.tree
    v = loop_visitor(atok)
    v.visit(tree)

    meta_data.region_source = v.loop_code
    meta_data.region_vars = set(v.loop_vars)
    frame = meta_data.stackframe[0]  # In 3.5, this can be stackframe.frame
    func_names = util.get_function_names_from_source(code_object.src,
                                                     meta_data.funcs_name)

    meta_data.callers_mods = util.get_modules_from_stackframe(frame)
    meta_data.callers_funcs = util.get_functions_from_stackframe(
        frame, func_names)
    meta_data.funcs_funcs = util.get_functions_from_module(
        meta_data.funcs_module, func_names)
    meta_data.funcs_mods = util.get_modules_from_module(meta_data.funcs_module)

    funcs = meta_data.funcs_funcs + meta_data.callers_funcs
    module_vars = meta_data.funcs_mods + meta_data.callers_mods

    new_source = back_end.for_loop(code_object, meta_data)
    return Code(new_source)
コード例 #8
0
ファイル: example_helper.py プロジェクト: codecamp/protowhat
 def get_text(self, full_text=None):
     atok = asttokens.ASTTokens(full_text, tree=self)
     return atok.get_text(self)
コード例 #9
0
def collect_data(filename: str,
                 args: argparse.ArgumentParser) -> List[List[str]]:
    """
    Read an 2 unparallel corpuses: functions and docstrings.
    ---
    Returns:
        data: List[List[str]]
            Summarized data from functions.
        is_appropriate: bool
            A flag indicating that the file is appropriate
            (enough scope size or no errors in parsing).
    """
    global error_counter

    # Convert Python 2 to Python 3
    # os.system(f"~/anaconda3/envs/scs/bin/2to3 {filename} -w -n")
    # run(["/home/masaidov/.conda/envs/scs/bin/2to3", filename, "-w", "-n"],
    #     stdout=DEVNULL, stderr=STDOUT)
    run(["/home/marat/anaconda3/envs/scs-ext/bin/2to3", filename, "-w", "-n"],
        stdout=DEVNULL,
        stderr=STDOUT)
    print("Building AST tree from a filename:", filename)

    try:
        code = read_file_to_string(filename)
    except:
        print("File with bad encoding:", filename)
        error_counter += 1
        is_appropriate = False
        return None, is_appropriate

    # let's replace tabs for spaces in the future
    code = re.sub('\t', ' ' * 4, code)

    code_lines = code.splitlines()

    try:
        atok = asttokens.ASTTokens(code, parse=True)
        astree = atok.tree
    except:
        print("Files with an error:", error_counter)
        error_counter += 1
        is_appropriate = False
        return None, is_appropriate

    data = []

    # Global loop: iterating over functions from file
    for fun_ind, fun in enumerate(ast.walk(astree)):
        if isinstance(fun, ast.FunctionDef) and len(fun.body) > 0:
            fun_begin = fun.first_token.startpos
            fun_end = fun.last_token.endpos
            prev_comment = get_previous_comments(fun, code_lines)
            docstring = ast.get_docstring(fun)
            if not docstring:
                docstring = ""
            else:
                docstring = DOCSTRING_PREFIX + docstring + "\n"

            # Forming scope -- set of node ids (variables)
            scope = [arg.arg for arg in fun.args.args]
            for node in ast.walk(fun):
                if isinstance(node, ast.Name) and \
                   isinstance(node.ctx, ast.Store):
                    scope.append(node.id)
            scope = set(scope)

            if len(scope) < 2:
                # print(f"Note: Function with fun.name = {fun.name} has too "
                #       "small scope.")
                continue

            function_code = code[fun_begin:fun_end]

            # if met @classmethod keyword,
            # should relax tabulation
            start_def = function_code.find("def")
            function_code = function_code[start_def:]

            function_code, tokens, comments, docstring, stopwords_count, \
                is_tokenizable = get_tokens(function_code)

            if not is_tokenizable:
                error_counter += 1
                function_code = ""
                tokens = []

            # print(f"In filename = {filename}, fun_ind = {fun_ind}")
            # print(f"Found {stopwords_count} stopwords.")

            if len(prev_comment) > 0:
                comments = [prev_comment] + comments

            data.append([filename, function_code, tokens, comments, docstring])

    is_appropriate = len(data) > 0
    return data, is_appropriate
コード例 #10
0
def _collapse(index, clause_list, code_object, meta_data, back_end):
    """
    The 'collapse' clause is used to specify how many tightly nested loops
    are associated with the 'loop' construct. The argument to the 'collapse'
    clause must be a constant positive integer expression. If no 'collapse'
    clause is present, only the immediately following loop is associated
    with the 'loop' construct.

    If more than one loop is associated with the 'loop' construct, the
    iterations of all the associated loops are all scheduled according
    to the rest of the clauses. The trip count for all loops associated
    with the 'collapse' clause must be computable and invariant in all
    the loops.

    It is implementation-defined whether a 'gang', 'worker' or 'vector'
    clause on the construct is applied ot each loop, or to the
    linearized iteration space.
    """

    #TODO: This one's easy enough: just make sure that the number of iterations
    #      on each of the loops is invariant and countable and then set in the
    #      code_object a value to tell it which loops are talked about by the
    #      rest of the clauses.

    # for node in tree:
    #   if node is not a loop:
    #       break
    #   elif not loop is invariant and countable:
    #       break
    #   else:
    #       num_loops += 1
    # if num_loops != n:
    #   raise some sort of error that explains how many and which loops were
    #   found, and that you want n loops collapsed, but we could only
    #   guarantee num_loops
    #
    # code_object.num_loops = num_loops

    class _visitor(ast.NodeVisitor):
        def __init__(self, atok):
            self.atok = atok
            self._seen = set()

        def generic_visit(self, node):
            type_name = type(node).__name__

            if type_name == "comprehension":
                # TODO
                pass
            elif type_name == "For":
                # TODO
                pass
            ast.NodeVisitor.generic_visit(self, node)

    atok = asttokens.ASTTokens(code_object.src, parse=True)
    tree = atok.tree
    v = _visitor(atok)
    v.visit(tree)
    print("Done; exiting")
    exit()

    return -1, code_object
コード例 #11
0
ファイル: Tree.py プロジェクト: preesee/CodeCloneDetection
def python2tree(line):
    atok = asttokens.ASTTokens(line, parse=True)
    return atok, atok.tree
コード例 #12
0
def get_ranges(code):
    tokens = asttokens.ASTTokens(code, parse=True)
    visitor = RangeFinder()
    visitor.visit(tokens.tree)
    return visitor.ranges
コード例 #13
0
ファイル: project.py プロジェクト: dustindall/modelx
def _parse_source(path_, obj):

    with open(path_, "r") as f:
        src = f.read()

    atok = asttokens.ASTTokens(src, parse=True)

    def parse_stmt(node):
        """Return (list of) instructions"""
        if isinstance(node, ast.FunctionDef):
            if node.name == "_formula":
                method = "set_formula"
            else:
                method = "new_cells"

            funcdef = atok.get_text(node)

            # The code below is just for adding back comment in the last line
            # such as:
            # def foo():
            #     return 0  # Comment
            nxtok = node.last_token.index + 1
            if nxtok < len(atok.tokens) and (
                    atok.tokens[nxtok].type == tokenize.COMMENT
            ) and node.last_token.line == atok.tokens[nxtok].line:
                deflines = funcdef.splitlines()
                deflines.pop()
                deflines.append(node.last_token.line.rstrip())
                funcdef = "\n".join(deflines)

            return [
                _Instruction(obj=obj,
                             method=method,
                             kwargs={"formula": funcdef})
            ]

        if isinstance(node, ast.Assign):

            if node.first_token.string == "_name":
                method = "rename"
                val = ast.literal_eval(atok.get_text(node.value))
                _Instruction(obj=obj,
                             method=method,
                             args=(val, ),
                             kwargs={
                                 "rename_old": True
                             }).run()
                return []

            elif node.first_token.string == "_formula":
                # lambda formula definition
                method = "set_formula"
                val = atok.get_text(node.value)
                if val == "None":
                    val = None
                kwargs = {"formula": val}
                return [_Instruction(obj=obj, method=method, kwargs=kwargs)]

            elif node.first_token.string == "_refs":

                def bound_decode_refs(data):
                    return _decode_refs(data, obj.fullname)

                refs = json.loads(atok.get_text(node.value),
                                  object_hook=bound_decode_refs)

                def refhook(args, kwargs):
                    if args:
                        key, val = args
                        val = _restore_ref(val)
                        args = (key, val)
                    return args, kwargs

                result = []
                for key, val in refs.items():
                    result.append(
                        _Instruction(obj=obj,
                                     method="__setattr__",
                                     args=(key, val),
                                     arghook=refhook))
                return result

            elif node.first_token.string == "_bases":

                bases = [
                    _RefData(rel_to_abs(base, obj.fullname))
                    for base in ast.literal_eval(atok.get_text(node.value))
                ]

                def basehook(args, kwargs):
                    if args:
                        args = _restore_ref(args)

                    return args, kwargs

                return [
                    _Instruction(obj=obj,
                                 method="add_bases",
                                 args=bases,
                                 arghook=basehook)
                ]

            elif node.first_token.string == "_method":

                def excelhook(args, kwargs):
                    # path_ is free variable
                    # Add path to file name
                    args[0] = str(path_.with_name(args[0]))
                    return args, kwargs

                _method = json.loads(atok.get_text(node.value))
                return [
                    _Instruction(obj=obj,
                                 method=_method["method"],
                                 args=_method["args"],
                                 kwargs=_method["kwargs"],
                                 arghook=excelhook)
                ]

            elif node.first_token.string == "_allow_none":
                args = json.loads(atok.get_text(node.value))
                return [
                    _Instruction(obj=obj,
                                 method="set_property",
                                 args=["allow_none", args])
                ]

            else:
                # lambda cells definition
                return [
                    _Instruction(obj=obj,
                                 method="new_cells",
                                 kwargs={
                                     "name": atok.get_text(node.targets[0]),
                                     "formula": atok.get_text(node.value)
                                 })
                ]

    result = []
    for i, stmt in enumerate(atok.tree.body):

        if (i == 0 and isinstance(stmt, ast.Expr)
                and isinstance(stmt.value, ast.Str)):
            inst = _Instruction(obj=type(obj).doc,
                                method="fset",
                                args=(obj, stmt.value.s))
            result.append(inst)
        else:
            result.extend(parse_stmt(stmt))

    return result
コード例 #14
0
def find_missing_trailing_commas(source_code, *, filename='<unknown>'):
    atok = asttokens.ASTTokens(source_code, filename=filename, parse=True)
    comma_finder = MissingTrailingCommaFinder(atok)
    comma_finder.visit(atok.tree)

    return comma_finder
コード例 #15
0
def parse(source):
    assert (isinstance(source, str))

    atok = asttokens.ASTTokens(source, parse=True)

    return atok