def _parse_lambda(lam): """Returns the AST and source code of given lambda function. Args: lam: types.LambdaType, Python function/method/class Returns: gast.AST, Text: the parsed AST node; the source code that was parsed to generate the AST (including any prefixes that this function may have added). """ # TODO(mdan): Use a fast path if the definition is not multi-line. # We could detect that the lambda is in a multi-line expression by looking # at the surrounding code - an surrounding set of parentheses indicates a # potential multi-line definition. mod = inspect.getmodule(lam) f = inspect.getsourcefile(lam) def_line = lam.__code__.co_firstlineno # This method is more robust that just calling inspect.getsource(mod), as it # works in interactive shells, where getsource would fail. This is the # same procedure followed by inspect for non-modules: # https://github.com/python/cpython/blob/3.8/Lib/inspect.py#L772 lines = linecache.getlines(f, mod.__dict__) source = ''.join(lines) # Narrow down to the last node starting before our definition node. all_nodes = parse(source, preamble_len=0, single_node=False) search_nodes = [] for node in all_nodes: # Also include nodes without a line number, for safety. This is defensive - # we don't know whether such nodes might exist, and if they do, whether # they are not safe to skip. # TODO(mdan): Replace this check with an assertion or skip such nodes. if getattr(node, 'lineno', def_line) <= def_line: search_nodes.append(node) else: # Found a node starting past our lambda - can stop the search. break # Extract all lambda nodes from the shortlist. lambda_nodes = [] for node in search_nodes: lambda_nodes.extend( n for n in gast.walk(node) if isinstance(n, gast.Lambda)) # Filter down to lambda nodes which span our actual lambda. candidates = [] for ln in lambda_nodes: minl, maxl = MAX_SIZE, 0 for n in gast.walk(ln): minl = min(minl, getattr(n, 'lineno', minl)) lineno = getattr(n, 'lineno', maxl) end_lineno = getattr(n, 'end_lineno', None) if end_lineno is not None: # end_lineno is more precise, but lineno should almost always work too. lineno = end_lineno maxl = max(maxl, lineno) if minl <= def_line <= maxl: candidates.append((ln, minl, maxl)) # Happy path: exactly one node found. if len(candidates) == 1: (node, minl, maxl), = candidates # pylint:disable=unbalanced-tuple-unpacking return _without_context(node, lines, minl, maxl) elif not candidates: lambda_codes = '\n'.join([unparse(l) for l in lambda_nodes]) raise errors.UnsupportedLanguageElementError( f'could not parse the source code of {lam}:' f' no matching AST found among candidates:\n{lambda_codes}') # Attempt to narrow down selection by signature is multiple nodes are found. matches = [v for v in candidates if _node_matches_argspec(v[0], lam)] if len(matches) == 1: (node, minl, maxl), = matches return _without_context(node, lines, minl, maxl) # Give up if could not narrow down to a single node. matches = '\n'.join( 'Match {}:\n{}\n'.format(i, unparse(node, include_encoding_marker=False)) for i, (node, _, _) in enumerate(matches)) raise errors.UnsupportedLanguageElementError( f'could not parse the source code of {lam}: found multiple definitions' ' with identical signatures at the location. This error' ' may be avoided by defining each lambda on a single line and with' f' unique argument names. The matching definitions were:\n{matches}')
def dedent_block(code_string): """Dedents a code so that its first line starts at row zero.""" code_string = _unfold_continuations(code_string) token_gen = tokenize.generate_tokens(six.StringIO(code_string).readline) block_indentation = None tokens = [] try: for tok in token_gen: tokens.append(tok) except tokenize.TokenError: # Resolution of lambda functions may yield incomplete code, which can # in turn generate this error. We silently ignore this error because the # parser may still be able to deal with it. pass for tok in tokens: tok_type, tok_string, _, _, _ = tok if tok_type == tokenize.INDENT: block_indentation = tok_string block_level = len(block_indentation) break elif tok_type not in ( tokenize.NL, tokenize.NEWLINE, tokenize.STRING, tokenize.COMMENT): block_indentation = '' break if not block_indentation: return code_string block_level = len(block_indentation) first_indent_uses_tabs = '\t' in block_indentation for i, tok in enumerate(tokens): tok_type, tok_string, _, _, _ = tok if tok_type == tokenize.INDENT: if ((' ' in tok_string and first_indent_uses_tabs) or ('\t' in tok_string and not first_indent_uses_tabs)): # TODO(mdan): We could attempt to convert tabs to spaces by unix rule. # See: # https://docs.python.org/3/reference/lexical_analysis.html#indentation raise errors.UnsupportedLanguageElementError( 'code mixing tabs and spaces for indentation is not allowed') if len(tok_string) >= block_level: tok_string = tok_string[block_level:] tokens[i] = (tok_type, tok_string) new_code = tokenize.untokenize(tokens) # Note: untokenize respects the line structure, but not the whitespace within # lines. For example, `def foo()` may be untokenized as `def foo ()` # So instead of using the output of dedent, we match the leading whitespace # on each line. dedented_code = [] for line, new_line in zip(code_string.split('\n'), new_code.split('\n')): original_indent = re.match(_LEADING_WHITESPACE, line).group() new_indent = re.match(_LEADING_WHITESPACE, new_line).group() if len(original_indent) > len(new_indent): dedented_line = line[len(original_indent) - len(new_indent):] else: dedented_line = line dedented_code.append(dedented_line) new_code = '\n'.join(dedented_code) return new_code
def visit_YieldFrom(self, node): raise errors.UnsupportedLanguageElementError( 'generators are not supported by AutoGraph')
def visit_Attribute(self, node): if (node.attr is not None and node.attr.startswith('__') and not node.attr.endswith('__')): raise errors.UnsupportedLanguageElementError( 'mangled names are not yet supported by AutoGraph')
def fail(): raise errors.UnsupportedLanguageElementError( 'could not parse the source code:' '\n\n{}\n' 'This error may be avoided by creating the lambda in a standalone' ' statement.\n'.format(original_source))
def visit_Yield(self, node): raise errors.UnsupportedLanguageElementError('generators are not supported')
def visit_While(self, node): if node.orelse: raise errors.UnsupportedLanguageElementError( 'while/else statement not yet supported') self.generic_visit(node)