def visit_other(self, node: Node) -> VisitResult: for field in node.get("_fields", []): meth = getattr(self, "visit_" + node["ast_type"], self.visit_other_field) child = node.get(field) if child: node[field] = meth(child) return node
def visit_Attribute(self, node: Node) -> Node: value = deepcopy(node.get("value")) if not value: return node ids: List[Node] = [] while value: new_value = deepcopy(value.get("value")) if new_value: value.pop("value", None) ids.insert(0, self.visit(value)) value = new_value # Append a copy of this node at the end, and change the type of the original node.pop("value", None) node_copy = deepcopy(node) self._remove_internal(node_copy) self._normalize_position(node_copy) ids.append(node_copy) node["ast_type"] = "QualifiedIdentifier" # Copy the position of the first element if len(ids): for key in ("lineno", "end_lineno", "col_offset", "end_col_offset"): if ids[0].get(key): node[key] = ids[0][key] node.pop("attr", None) node["identifiers"] = ids return node
def name2arg(node: Node): # Normalize Python2 and 3 argument types if node["ast_type"] == "Name": node["ast_type"] = "arg" id_ = node.get("id") if id_: node["@token"] = node["id"] del node["id"]
def sync_node_pos(self, nodedict: Node) -> None: """ Check the column position, updating the column if needed (this changes the nodedict argument). Some Nodes have questionable column positions in the Python given AST (e.g. all items in sys.stdout.write have column 1). This fixes if the linenumber is right, using the more exact position given by the tokenizer. When a node is checked, it's removed from its line list, so the next token with the same name will not consume that token again (except for fstrings that are a special case of a token mapping to several possible AST nodes). """ node_line = nodedict.get('lineno') if node_line is None: return # We take these node properties as token name if they exists # (same used in the Bblfsh Python driver parser.go): node_keyset = set(nodedict.keys()) token_keys = list(node_keyset.intersection(TOKEN_KEYS)) if token_keys: node_token = nodedict[token_keys[0]] else: node_token = SYNTHETIC_TOKENS.get(nodedict["ast_type"]) if not node_token: return # token not found try: # Pop the fist token with the same name in the same line. token = self._pop_token(node_line, node_token) except TokenNotFoundException: # Only happens with multiline string and the original # position in that case is fine (uses the last line in that case) return if nodedict['ast_type'] != 'ImportFrom': # ImportFrom takes the module as token, we don't want that position, default # is fine nodedict["lineno"] = token.start.row nodedict["col_offset"] = token.start.col nodedict["end_lineno"] = token.end.row nodedict["end_col_offset"] = token.end.col
def sameline_remainder_noops(self, nodedict: Node) -> List[Token]: """ Return a list containing the trailing (until EOL) noop Tokens for the node, if any. The ending newline is implicit and thus not returned """ # Without a line number for the node we can't know lineno = nodedict.get("lineno") if not lineno: return [] # Skip remainder comments already added to a node in this line to avoid every node # in the same line having it (which is not conceptually wrong, but not DRY) if lineno in self._sameline_added_noops: return [] # Module nodes have the remaining comments but since we put their first line as "1" # any comment on the first line would wrongly show as sameline comment for the module if nodedict["ast_type"] == 'Module': return [] tokens = self._all_lines[lineno - 1] trailing: List[Token] = [] for token in tokens: if token.name not in NOOP_TOKENS_LINE: # restart trailing = [] else: trailing.append(token) if not trailing: return [] self._sameline_added_noops.add(lineno) nonewline_trailing = trailing[:-1] if trailing[ -1].value == '\n' else trailing return nonewline_trailing
def previous_nooplines(self, nodedict: Node) -> Tuple[List[str], int, int, int]: """Return a list of the preceding comment and blank lines""" previous = [] first_lineno = -1 lastline = -1 lastcol = -1 lineno = nodedict.get('lineno') if lineno and self.astmissing_lines: while self._current_line < lineno: token = self.astmissing_lines[self._current_line] if token: s = token.rawvalue.rstrip() + '\n' previous.append(s) # take only the first line of the noops as the start and the last # one (overwriteen every iteration) if first_lineno == -1: first_lineno = self._current_line + 1 lastline = self._current_line + 1 lastcol = token.end.col self._current_line += 1 return previous, first_lineno, lastline, lastcol
def visit_arguments(self, node: Node) -> Node: """ Convert the very odd Python's argument node organization (several different lists for each type and each type's default arguments that you have to right-match) into a more common in other languages single list of types arguments with default values as children of their arg. Also convert Python2's "Name" types inside the arguments to """ def match_default_args(args: List[Node], defaults: List[Node]) -> List[Node]: if defaults: lendiff = len(args) - len(defaults) for i, arg in enumerate(args[lendiff:]): arg["default"] = self.visit(defaults[i]) return args def name2arg(node: Node): # Normalize Python2 and 3 argument types if node["ast_type"] == "Name": node["ast_type"] = "arg" id_ = node.get("id") if id_: node["@token"] = node["id"] del node["id"] norm_args: List[Node] = [] normal_args = deepcopy(node.get("args")) if normal_args: defaults = deepcopy(node.get("defaults")) match_default_args(normal_args, defaults) for i in normal_args: norm_args.append(self.visit(i)) kwonly_args = deepcopy(node.get("kwonlyargs")) if kwonly_args: kw_defaults = deepcopy(node.get("kw_defaults")) match_default_args(kwonly_args, kw_defaults) for a in kwonly_args: a["ast_type"] = "kwonly_arg" for i in kwonly_args: norm_args.append(self.visit(i)) kwarg = deepcopy(node.get("kwarg")) if kwarg: kwarg["ast_type"] = "kwarg" norm_args.append(self.visit(kwarg)) vararg = deepcopy(node.get("vararg")) if vararg: vararg["ast_type"] = "vararg" norm_args.append(self.visit(vararg)) for k in ('defaults', 'kw_defaults', 'args', 'kwonlyargs', 'kwarg', 'vararg'): if k in node: del node[k] for n in norm_args: if "arg" in n: n["@token"] = n["arg"] del n["arg"] name2arg(n) node["args"] = norm_args return node
def add_noops(self, node: Node, isRoot: bool) -> None: if not isinstance(node, dict): return def _create_nooplines_list(startline: int, noops_previous: List[str]) -> List[Node]: nooplines: List[Node] = [] curline = startline for noopline in noops_previous: if noopline != '\n': nooplines.append({ "ast_type": "NoopLine", "noop_line": noopline, "lineno": curline, "col_offset": 1, }) curline += 1 return nooplines # Add all the noop (whitespace and comments) lines between the # last node and this one noops_previous, startline, endline, endcol = self.previous_nooplines( node) if noops_previous: node['noops_previous'] = { "ast_type": "PreviousNoops", "lineno": startline, "col_offset": 1, "end_lineno": endline, "end_col_offset": max(endcol, 1), "lines": _create_nooplines_list(startline, noops_previous) } # Other noops at the end of its significative line except the implicit # finishing newline noops_sameline: List[Token] = [ i for i in self.sameline_remainder_noops(node) if i ] def new_noopline(s: str) -> Dict[str, str]: return {"ast_type": "NoopSameLine", "s": s} noop_lines = [new_noopline(i.value.strip()) for i in noops_sameline] if noops_sameline: node['noops_sameline'] = { "ast_type": "SameLineNoops", "lineno": node.get("lineno", 0), "col_offset": noops_sameline[0].start.col, "noop_lines": noop_lines, "end_lineno": node.get("lineno", 0), "end_col_offset": max(noops_sameline[-1].end.col, 1) } # Finally, if this is the root node, add all noops after the last op node if isRoot: noops_remainder, startline, endline, endcol = self.remainder_noops( ) if noops_remainder: node['noops_remainder'] = { "ast_type": "RemainderNoops", "lineno": startline, "col_offset": 1, "end_lineno": endline, "end_col_offset": max(endcol, 1), "lines": _create_nooplines_list(startline, noops_remainder) }
def visit_arguments(self, node: Node) -> Node: """ Convert the very odd Python's argument node organization (several different lists for each type and each type's default arguments that you have to right-match) into a more common in other languages single list of types arguments with default values as children of their arg. Also convert Python2's "Name" types inside the arguments to """ def match_default_args(args: List[Node], defaults: List[Node]) -> List[Node]: if defaults: lendiff = len(args) - len(defaults) for i, arg in enumerate(args[lendiff:]): arg["default"] = self.visit(defaults[i]) return args def _str2node(s: str) -> dict: # some nodes in Python2 AST are strings instead of objects # convert to same format return { "arg": s, "annotation": None, # the tokenizer will fix the positions later "lineno": 1, "end_lineno": 1, "col_offset": 0, "end_col_offset": 0 } def name2arg(node: Node): # Normalize Python2 and 3 argument types if node["ast_type"] == "Name": node["ast_type"] = "arg" id_ = node.get("id") if id_: node["@token"] = node["id"] del node["id"] norm_args: List[Node] = [] normal_args = deepcopy(node.get("args")) if normal_args: defaults = deepcopy(node.get("defaults")) match_default_args(normal_args, defaults) for i in normal_args: norm_args.append(self.visit(i)) kwonly_args = deepcopy(node.get("kwonlyargs")) if kwonly_args: kw_defaults = deepcopy(node.get("kw_defaults")) match_default_args(kwonly_args, kw_defaults) for a in kwonly_args: a["ast_type"] = "kwonly_arg" for i in kwonly_args: norm_args.append(self.visit(i)) kwarg = deepcopy(node.get("kwarg")) if kwarg: if isinstance(kwarg, str): # Python2 kwargs are just strings; convert to same format # as Python3 kwarg = _str2node(kwarg) kwarg["ast_type"] = "kwarg" norm_args.append(self.visit(kwarg)) vararg = deepcopy(node.get("vararg")) if vararg: if isinstance(vararg, str): # Python2 varargs are just strings; convert to same format # as Python3 vararg = _str2node(vararg) vararg["ast_type"] = "vararg" norm_args.append(self.visit(vararg)) for k in ('defaults', 'kw_defaults', 'args', 'kwonlyargs', 'kwarg', 'vararg'): if k in node: del node[k] for n in norm_args: if "arg" in n: n["@token"] = n["arg"] del n["arg"] name2arg(n) node["args"] = norm_args return node