Exemple #1
0
    def __init__(self, atok: asttokens.ASTTokens, node: ast.Lambda) -> None:
        """
        Initialize.

        :param atok: parsed AST tree and tokens with added positional properties
        :param node: lambda AST node corresponding to the condition
        """
        self.atok = atok
        self.node = node

        text = atok.get_text(node.body)
        assert isinstance(text, str)
        self.text = text
Exemple #2
0
def wrap_dict(asttokens: ASTTokens, node: ast.Dict) -> WrappingSummary:
    positions = []

    for key, value in zip(node.keys, node.values):
        if key is not None:
            positions.append(Position.from_node_start(key))
        else:
            kwargs_stars = asttokens.prev_token(_first_token(value))
            positions.append(Position(*kwargs_stars.start))

    summary = [(x, MutationType.WRAP_INDENT) for x in positions]
    append_trailing_comma(asttokens, summary, node)
    append_wrap_end(summary, node)
    return summary
Exemple #3
0
def indent_interim_lines(
    asttokens: ASTTokens,
    wrapping_summary: WrappingSummary,
) -> WrappingSummary:
    if not wrapping_summary:
        # No changes to be made
        return wrapping_summary

    first_line = wrapping_summary[0][0].line
    last_line = wrapping_summary[-1][0].line

    if first_line == last_line:
        # Everything was on one line to start with, nothing for us to do.
        return wrapping_summary

    # Add indentations for things which were already wrapped somewhat. We don't
    # want to touch the first line (since that's the line we're splitting up),
    # but we do want to indent anything which was already on the last line we're
    # touching.
    for line in range(first_line + 1, last_line + 1):
        tok = asttokens.get_token(line, 0)
        if tok.start[0] != line:
            # We've got the last token on the previous line, but we want the
            # first on this line.
            tok = asttokens.next_token(tok, include_extra=True)

            assert tok.start[0] == line, "Token unexpectedly on wrong line"

        if tok.string == '\n':
            # Empty lines don't need indenting
            continue

        wrapping_summary.append((Position(*tok.start), MutationType.INDENT), )

    wrapping_summary.sort(key=lambda x: x[0])

    return wrapping_summary
Exemple #4
0
def wrap_generator_body(
    asttokens: ASTTokens,
    elt: ast.expr,
    generators: List[ast.comprehension],
) -> WrappingSummary:
    start_positions = [Position.from_node_start(elt)]

    for generator in generators:
        start_positions.append(Position.from_node_start(generator))
        for compare in generator.ifs:
            if_token = asttokens.prev_token(_first_token(compare))
            assert if_token.string == 'if'
            start_positions.append(Position(*if_token.start))

    return [(x, MutationType.WRAP_INDENT) for x in start_positions]
Exemple #5
0
def process(
    positions: List[Position],
    content: str,
    filename: str,
) -> List[Insertion]:
    try:
        asttokens = ASTTokens(content, parse=True, filename=filename)
    except SyntaxError as e:
        # Catch syntax errors within the file we were asked to parse. We trust
        # that the error is not within asttokens itself.
        raise TargetSyntaxError(e) from e

    insertions = merge_insertions(
        determine_insertions(asttokens, position) for position in positions)

    return insertions
Exemple #6
0
def wrap_bool_op(asttokens: ASTTokens, node: ast.BoolOp) -> WrappingSummary:
    summary = wrap_node_start_positions(asttokens, node.values)

    summary.append((
        Position(*_last_token(node).end),
        MutationType.WRAP,
    ))

    # Work out if we have parentheses already, if not we need to add some
    if asttokens.prev_token(_first_token(node)).string != '(':
        summary.insert(0, (
            Position.from_node_start(node),
            MutationType.OPEN_PAREN,
        ))
        summary.append((
            Position(*_last_token(node).end),
            MutationType.CLOSE_PAREN,
        ))

    return summary
def try_to_extract(file):
    if not os.path.isfile(file):
        return None
    with open(file) as fin:
        try:
            atok = ASTTokens(fin.read().encode('utf-8'),
                             parse=True,
                             filename=file)
        except TokenError:
            try:
                atok = ASTTokens(fin.read().encode("cp1252"),
                                 parse=True,
                                 filename=file)
            except TokenError:
                print("Unable to extract from {}".format(file))
                return None
        except SyntaxError:
            try:
                atok = ASTTokens(fin.read().encode("cp1252"),
                                 parse=True,
                                 filename=file)
            except SyntaxError:
                print("Unable to extract from {}".format(file))
                return None
        except ValueError:
            try:
                atok = ASTTokens(fin.read().encode("cp1252"),
                                 parse=True,
                                 filename=file)
            except ValueError:
                print("Unable to extract from {}".format(file))
                return None
        except IndexError:
            try:
                atok = ASTTokens(fin.read().encode("cp1252"),
                                 parse=True,
                                 filename=file)
            except IndexError:
                print("Unable to extract from {}".format(file))
                return None
        except TypeError:
            try:
                atok = ASTTokens(fin.read().encode("cp1252"),
                                 parse=True,
                                 filename=file)
            except TypeError:
                print("Unable to extract from {}".format(file))
                return None
    return atok.tree
Exemple #8
0
def append_trailing_comma(
    asttokens: ASTTokens,
    summary: WrappingSummary,
    node: ast.AST,
) -> WrappingSummary:
    # Use the end position of the last content token, rather than the start
    # position of the closing token. This ensures that we put the comma in the
    # right place in constructs like:
    #
    #   func(
    #       'abcd', 'defg'
    #   )
    #
    # Where we want to put the comma immediately after 'defg' rather than just
    # before the closing paren.
    last_body_token = asttokens.prev_token(_last_token(node))
    summary.append((
        Position(*last_body_token.end),
        MutationType.TRAILING_COMMA,
    ))
    return summary
Exemple #9
0
def wrap_call(asttokens: ASTTokens, node: ast.Call) -> WrappingSummary:
    named_args = node.keywords
    kwargs = None
    if named_args and named_args[-1].arg is None:
        named_args = node.keywords[:-1]
        kwargs = node.keywords[-1]

    if (len(node.args) == 1 and not named_args
            and isinstance(node.args[0], ast.GeneratorExp)
            and not generator_is_parenthesised(asttokens, node.args[0])):
        generator_node = node.args[0]  # type: ast.GeneratorExp
        # The generator needs parentheses adding, as well as wrapping
        summary = [(
            Position.from_node_start(generator_node),
            MutationType.WRAP_INDENT,
        ), (
            Position.from_node_start(generator_node),
            MutationType.OPEN_PAREN,
        ),
                   (
                       Position(*_last_token(generator_node).end),
                       MutationType.CLOSE_PAREN,
                   )]

    else:
        summary = wrap_node_start_positions(asttokens,
                                            [*node.args, *named_args])

    if kwargs is not None:
        kwargs_stars = asttokens.prev_token(_first_token(kwargs))
        summary.append(
            (Position(*kwargs_stars.start), MutationType.WRAP_INDENT))

    append_trailing_comma(asttokens, summary, node)
    append_wrap_end(summary, node)
    return summary
Exemple #10
0
def get_tokenised(filepath):
    with open(filepath, "r") as f:
        source = f.read()
    tokenised = ASTTokens(source, parse=True)
    return tokenised
Exemple #11
0
def _negate_compare_text(atok: asttokens.ASTTokens, node: ast.Compare) -> str:
    """
    Generate the text representing the negation of the comparison node.

    :param atok:
        parsing obtained with ``asttokens`` so that we can access the last tokens of a node.

        The standard ``ast`` module provides only the first token of an AST node. In lack of concrete syntax tree,
        getting text from first to last token is currently the simplest approach.
    :param node: AST node representing the comparison in a condition
    :return: text representation of the node's negation
    """
    assert len(
        node.ops) == 1, "A single comparison expected, but got: {}".format(
            len(node.ops))
    assert len(node.comparators
               ) == 1, "A single comparator expected, but got: {}".format(
                   len(node.comparators))

    operator = node.ops[0]
    left = node.left
    right = node.comparators[0]

    left_text = atok.get_text(node=left)
    right_text = atok.get_text(node=right)

    text = ''

    if isinstance(operator, ast.Eq):
        text = '{} != {}'.format(left_text, right_text)

    elif isinstance(operator, ast.NotEq):
        text = '{} == {}'.format(left_text, right_text)

    elif isinstance(operator, ast.Lt):
        text = '{} >= {}'.format(left_text, right_text)

    elif isinstance(operator, ast.LtE):
        text = '{} > {}'.format(left_text, right_text)

    elif isinstance(operator, ast.Gt):
        text = '{} <= {}'.format(left_text, right_text)

    elif isinstance(operator, ast.GtE):
        text = '{} < {}'.format(left_text, right_text)

    elif isinstance(operator, ast.Is):
        text = '{} is not {}'.format(left_text, right_text)

    elif isinstance(operator, ast.IsNot):
        text = '{} is {}'.format(left_text, right_text)

    elif isinstance(operator, ast.In):
        text = '{} not in {}'.format(left_text, right_text)

    elif isinstance(operator, ast.NotIn):
        text = '{} in {}'.format(left_text, right_text)

    else:
        raise NotImplementedError(
            "Unhandled comparison operator: {}".format(operator))

    return text
Exemple #12
0
 def create_asttokens(source):
     return ASTTokens(source, parse=True)
Exemple #13
0
def multilinify_import_stmt_str(import_stmt_str,
                                indent_spaces=4,
                                trailing_comma=True):
    """
    Takes a single line import statement and turns it into a multiline string.
    Will raise a `ValueError` if given a multiline string (a newline at the end
    of the string is permitted).

    This function is written in expectation of the output of `get_import_stmt_str`,
    and is not intended to process all potential ways of writing an import statement.

        import_stmt_str:  String of Python code carrying out an import statement.
        indent_spaces:    Number of spaces to indent by in multiline format.
        trailing_comma:   Whether to add a trailing comma to the final alias in a
                          multiline list of import aliases (default: True)
    """
    import_stmt_str = import_stmt_str.rstrip(nl)
    n_nl = import_stmt_str.count(nl)
    if n_nl > 0:
        raise ValueError(f"{import_stmt_str} is not a single line string")
    imp_ast = ast.parse(import_stmt_str)
    assert type(imp_ast.body[0]) in [IType,
                                     IFType], "Not a valid import statement"
    tko = ASTTokens(import_stmt_str)
    first_tok = tko.tokens[0]
    import_tok = tko.find_token(first_tok, tok_type=1, tok_str="import")
    assert import_tok.type > 0, f"Unable to find import token in the given string"
    imp_preamble_str = import_stmt_str[:import_tok.endpos]
    post_import_tok = tko.tokens[import_tok.index + 1]
    imp_names_str = import_stmt_str[post_import_tok.startpos:]
    aliases = [(x.name, x.asname) for x in imp_ast.body[0].names]
    seen_comma_tok = None
    multiline_import_stmt_str = imp_preamble_str
    multiline_import_stmt_str += " (" + nl
    for al_i, (a_n, a_as) in enumerate(aliases):
        is_final_alias = al_i + 1 == len(aliases)
        if seen_comma_tok is None:
            # Get start of alias by either full name or first part of .-separated name
            al_n_tok = tko.find_token(import_tok, 1, tok_str=a_n.split(".")[0])
            assert al_n_tok.type > 0, f"Unable to find the token for {a_n}"
        else:
            al_n_tok = tko.find_token(seen_comma_tok,
                                      1,
                                      tok_str=a_n.split(".")[0])
            assert al_n_tok.type > 0, f"Unable to find the token for {a_n}"
        al_startpos = al_n_tok.startpos
        if a_as is None:
            if is_final_alias:
                # There won't be a comma after this (it is the last import name token)
                al_endpos = al_n_tok.endpos
            else:
                comma_tok = tko.find_token(al_n_tok, tok_type=53, tok_str=",")
                if comma_tok.type == 0:
                    # Due to an error in asttokens, sometimes tok_type is given as 54
                    # although this should be an error (the failure tok_type is 0)
                    comma_tok = tko.find_token(al_n_tok,
                                               tok_type=54,
                                               tok_str=",")
                assert comma_tok.type > 0, f"Unable to find comma token"
                al_endpos = comma_tok.endpos
        else:
            al_as_tok = tko.find_token(al_n_tok, tok_type=1, tok_str=a_as)
            assert al_as_tok.type > 0, f"Unable to find the token for {a_as}"
            if is_final_alias:
                # There won't be a comma after this (it's the last import asname token)
                al_endpos = al_as_tok.endpos
            else:
                comma_tok = tko.find_token(al_as_tok, tok_type=53, tok_str=",")
                if comma_tok.type == 0:
                    # Due to an error in asttokens, sometimes tok_type is given as 54
                    # although this should be an error (the failure tok_type is 0)
                    comma_tok = tko.find_token(al_n_tok,
                                               tok_type=54,
                                               tok_str=",")
                assert comma_tok.type > 0, f"Unable to find comma token"
                al_endpos = comma_tok.endpos
        alias_chunk = import_stmt_str[al_startpos:al_endpos]
        if is_final_alias:
            if trailing_comma:
                alias_chunk += ","
        else:
            seen_comma_tok = comma_tok
        multiline_import_stmt_str += (" " * indent_spaces) + alias_chunk + nl
    # Finally, verify that the end of the tokenised string was reached
    assert al_endpos == tko.tokens[
        -1].endpos, "Did not tokenise to the end of string"
    # No need to further slice the input string, return the final result
    multiline_import_stmt_str += ")" + nl
    return multiline_import_stmt_str
Exemple #14
0
def get_all_objects(line, frame):
    """Given a (partial) line of code and a frame,
    obtains a dict containing all the relevant information about objects
    found on that line so that they can be formatted as part of the
    answer to "where()" or they can be used during the analysis
    of the cause of the exception.

    The dict returned has five keys.
    The first three, 'locals', 'globals', 'builtins',
    each containing a list of tuples, each tuple being of the form
    (name, repr(obj), obj) where name --> obj.

    The fourth key, 'expressions', contains a list of tuples of the form
    ('name', obj). It is only occasionally used in helping to make
    suggestions regarding the cause of some exception.
    """
    objects = {
        "locals": [],
        "globals": [],
        "builtins": [],
        "expressions": [],
        "name, obj": [],
    }

    scopes = (
        ("locals", frame.f_locals),  # always have locals before globals
        ("globals", frame.f_globals),
    )

    names = set()

    tokens = token_utils.get_significant_tokens(line)
    for tok in tokens:
        if tok.is_identifier():
            name = tok.string
            if name in names:
                continue
            for scope, scope_dict in scopes:
                if name in scope_dict:
                    names.add(name)
                    obj = scope_dict[name]
                    objects[scope].append((name, repr(obj), obj))
                    objects["name, obj"].append((name, obj))
                    break
            else:
                if name in dir(builtins):
                    names.add(name)
                    obj = getattr(builtins, name)
                    objects["builtins"].append((name, repr(obj), obj))
                    objects["name, obj"].append((name, obj))

    try:
        atok = ASTTokens(line, parse=True)
    except SyntaxError:  # this should not happen
        return objects

    if atok is not None:
        evaluator = Evaluator.from_frame(frame)
        for nodes, obj in group_expressions(
                pair for pair in evaluator.find_expressions(atok.tree)):
            name = atok.get_text(nodes[0])
            if name in names:
                continue
            names.add(name)
            objects["name, obj"].append((name, obj))
            try:
                # We're not interested in showing literals in the list of variables
                ast.literal_eval(name)
            except Exception:  # noqa
                objects["expressions"].append((name, obj))

    return objects
def get_all_objects(line, frame):
    """Given a (partial) line of code and a frame,
    obtains a dict containing all the relevant information about objects
    found on that line so that they can be formatted as part of the
    answer to "where()" or they can be used during the analysis
    of the cause of the exception.

    The dict returned has four keys.
    The first three, 'locals', 'globals', 'nonlocals',
    each containing a list of tuples, each tuple being of the form
    (name, repr(obj), obj) where name --> obj.

    The fourth key, 'literals', contains a list of tuples of the form
    ('name', obj). It is only occasionally used in helping to make
    suggestions regarding the cause of some exception.
    """
    objects = {
        "locals": [],
        "globals": [],
        "literals": [],
        "builtins": [],
        "name, obj": [],
    }

    scopes = (
        ("locals", frame.f_locals),  # always have locals before globals
        ("globals", frame.f_globals),
    )

    names = set([])
    try:
        atok = ASTTokens(line, parse=True)
    except SyntaxError:  # this should not happen
        atok = None

    if atok is not None:
        for scope, scope_dict in scopes:
            for nodes, obj in Evaluator(
                    scope_dict).interesting_expressions_grouped(atok.tree):
                name = atok.get_text(nodes[0])
                if name in names:
                    continue
                names.add(name)
                objects[scope].append((name, repr(obj), obj))
                objects["name, obj"].append((name, obj))

        Evaluator.literal_expressions_grouped = literal_expressions_grouped
        for nodes, obj in Evaluator({}).literal_expressions_grouped(
                atok.tree):  # noqa
            name = atok.get_text(nodes[0])
            objects["literals"].append((name, obj))
            objects["name, obj"].append((name, obj))

    tokens = token_utils.get_significant_tokens(line)
    for tok in tokens:
        if tok.is_identifier():
            name = tok.string
            if name in names:
                continue
            for scope, scope_dict in scopes:
                if name in scope_dict:
                    names.add(name)
                    obj = scope_dict[name]
                    objects[scope].append((name, repr(obj), obj))
                    objects["name, obj"].append((name, obj))
                    break
            else:
                if name in dir(builtins):
                    obj = getattr(builtins, name)
                    objects["builtins"].append((name, repr(obj), obj))
                    objects["name, obj"].append((name, obj))

    dotted_names = get_dotted_names(line)
    for name in dotted_names:
        for scope, scope_dict in scopes:
            if name not in scope_dict:
                continue
            obj = scope_dict[name]
            if (name, obj) not in objects["name, obj"]:
                objects[scope].append((name, repr(obj), obj))
                objects["name, obj"].append((name, obj))

    # TODO: check to see if this is still needed
    objects["nonlocals"] = get_nonlocal_objects(frame)
    return objects
Exemple #16
0
 def compile(self, source, filename, flags=0):
     traced_file = super(BirdsEye, self).compile(source, filename, flags)
     traced_file.tokens = ASTTokens(source, tree=traced_file.root)
     return traced_file
Exemple #17
0
 def create_asttokens(source):
     builder = astroid.builder.AstroidBuilder()
     tree = builder.string_build(source)
     return ASTTokens(source, tree=tree)
Exemple #18
0
def colour_imp_stmt(imp_stmt, lines):
    """
    Summary: get a string which when printed will show the separate parts of an
    import statement in different colours (preamble in blue, alias names in red,
    alias asnames in purple, the word "as" itself in yellow, commas between import
    aliases in light green, and post-matter (a bracket) in light red.

    For an import statement within an asttokens-annotated AST, which comes with
    all subnodes annotated with first and last token start/end positional information,
    access all the tokens corresponding to the import statement name(s) and asname(s).

    Do this using a list of lines (i.e. a list of strings, each of which is a line),
    the subset of which corresponding to the import statement `imp_stmt` are given
    by its `first_token.start` and `last_token.end` attributes (in each case, the
    attribute is a tuple of `(line, column)` numbers, and it is conventional to store
    these as a 1-based index, so to cross-reference to a 0-based index of the list
    of lines we decrement this value and store as `imp_startln` and `imp_endln`).
    The subset of lines corresponding to `imp_stmt` is then assigned as `nodelines`,
    and we join this into a single string as `nodestring`.

    Then a new ASTTokens object, `tko`, can be made by parsing `nodestring`, on which
    the `find_tokens` method provides access to each name/asname one at a time, when
    matched to the name/asname string. These name/asname strings are available
    within the `imp_stmt` object via its `names` attribute, which is a list of
    `ast.alias` class instances, each of which has both a `name` and `asname` attribute
    (the latter of which is `None` if no asname is given for the import name).

    `find_tokens` returns a token with attribute `type` of value `1` for a name (1 is
    the index of "NAME" in the `token.tok_name` dictionary), and `startpos`/`endpos`
    attributes (integers which indicate the string offsets within `nodestring`).

    These `startpos` integers are an efficient way to store this list of tokens
    (the "NAME" tokens corresponding to import statement alias names and asnames),
    and so even though it would be possible to store all tokens, I choose to simply
    re-access them with the `tko.get_token_from_offset(startpos)` method.

    At the moment, I only re-access these tokens to retrieve their `endpos` (end
    position offset), which is also an integer and could also be stored easily
    without much problem, however for the sake of clarity I prefer to re-access
    the entire token and not have to construct an arbitrary data structure for
    storing the start and end positions (which could get confusing).

    Lastly, I construct a colourful string representation of the import statement
    by using these start positions and re-retrieved end positions to pull out
    and modify (using the `mvdef.colours`та╢`colour_str` function) the names and asnames
    (names are coloured red, asnames are coloured purple), and use string slicing
    to swap the ranges that the names and asnames were in in the original
    `nodestring` for these colourful replacements.

    The end result, `modified_nodestring` is then returned, which will then
    display in colour on Linux and OSX (I don't think Windows supports ANSI codes,
    so I made `colour_str` only apply on these platforms).
    """
    assert "first_token" in imp_stmt.__dir__(
    ), "Not an asttokens-annotated AST node"
    assert type(imp_stmt) in [IType, IFType], "Not an import statement"
    is_from = type(imp_stmt) is IFType
    imp_startln = imp_stmt.first_token.start[0] - 1  # Use 0-based line index
    imp_endln = imp_stmt.last_token.end[0] - 1  # to match list of lines
    nodelines = lines[imp_startln:(imp_endln + 1)]
    n_implines = len(nodelines)
    nodestring = "".join(nodelines)
    tko = ASTTokens(nodestring)
    new_nodelines = [list() for _ in range(n_implines)]
    # Subtract the import statement start position from the name or asname
    # token start position to get the offset, then use the offset to extract
    # a range of text from the re-parsed ASTTokens object for the nodestring
    # corresponding to the import name or asname in question.
    imp_startpos = imp_stmt.first_token.startpos
    alias_starts = []
    for alias in imp_stmt.names:
        al_n, al_as = alias.name, alias.asname
        # 1 is the key for "NAME" in Python's tokens.tok_name
        s = [tko.find_token(tko.tokens[0], 1, tok_str=al_n).startpos]
        if al_as is not None:
            s.append(tko.find_token(tko.tokens[0], 1, tok_str=al_as).startpos)
        alias_starts.append(s)
    assert len(alias_starts) > 0, "An import statement cannot import no names!"
    assert alias_starts[0][
        0] > 0, "An import statement cannot begin with a name!"
    modified_nodestring = ""
    # -------------------------------------------------------------------------
    # Now set up colour definitions for the modified import statement string
    name_colour, asname_colour = ["red", "purple"]
    pre_colour, post_colour = ["light_blue", "light_red"]
    as_string_colour = "yellow"
    comma_colour = "light_green"
    # -------------------------------------------------------------------------
    first_import_name_startpos = alias_starts[0][0]
    pre_str = nodestring[:first_import_name_startpos]
    modified_nodestring += colour(pre_colour, pre_str)
    seen_endpos = first_import_name_startpos
    # (Could add a try/except here to verify colours are in colour dict if modifiable)
    for al_i, alias_start_list in enumerate(alias_starts):
        for al_j, al_start in enumerate(alias_start_list):
            if seen_endpos < al_start:
                # There is an intervening string, append it to modified_nodestring
                intervening_str = nodestring[seen_endpos:al_start]
                if al_j > 0:
                    # This is the word "as", which comes between a name and an asname
                    modified_nodestring += colour(as_string_colour,
                                                  intervening_str)
                else:
                    if al_i > 0:
                        assert "," in intervening_str, "Import aliases not comma-sep.?"
                        modified_nodestring += colour(comma_colour,
                                                      intervening_str)
                    else:
                        modified_nodestring += intervening_str
            # Possible here to distinguish between names and asnames by al_j if needed
            is_asname = bool(al_j)  # al_j is 0 if name, 1 if asname
            name_tok = tko.get_token_from_offset(al_start)
            assert name_tok.type > 0, f"No import name at {al_start} in {nodestring}"
            al_endpos = name_tok.endpos
            imp_name = nodestring[al_start:al_endpos]
            cstr_colour = [name_colour, asname_colour][al_j]
            cstr = colour(cstr_colour, imp_name)
            modified_nodestring += cstr
            seen_endpos = al_endpos
    end_str = nodestring[seen_endpos:]
    modified_nodestring += colour(post_colour, end_str)
    return modified_nodestring