def visit_Assign(self, node: Assign): if hasattr(node, "value") and hasattr(node.value, "func") and hasattr( node.value.func, "id") and node.value.func.id == "NewType" and hasattr( node, "value") and hasattr(node.value, "args") and len( node.value.args) == 2: self.__type_graph.add_type_alias( parse_type_annotation_node(node.value.args[0]), parse_type_annotation_node(node.value.args[1])) # TODO: Type aliases are of the form Vector=List[float] how do we parse these? if node.type_comment is not None and len(node.targets) != 1: assert False for i, target in enumerate(node.targets): if isinstance(target, Attribute) or isinstance(target, Name): self.__visit_variable_like( target, target.lineno, target.col_offset, can_annotate_here=True, type_annotation=parse_type_comment(node.type_comment) if node.type_comment is not None else None) else: self.visit(target) if i > 0: self._add_edge(node.targets[i - 1], target, EdgeType.NEXT) self._add_edge(target, node.value, EdgeType.COMPUTED_FROM) if i < len(node.targets) - 1: self.add_terminal(TokenNode(',')) self.add_terminal(TokenNode('=')) self.visit(node.value)
def visit_FormattedValue(self, node: FormattedValue): self.add_terminal(TokenNode(str('f"'))) self.visit(node.value) if node.format_spec is not None: self.add_terminal(TokenNode(str(':'))) self.visit(node.format_spec) self.add_terminal(TokenNode(str('"')))
def visit_Raise(self, node: Raise): self._add_edge(node, self.__return_scope, EdgeType.RETURNS_TO) self.add_terminal(TokenNode('raise')) if node.exc is not None: self.visit(node.exc) if node.cause is not None: self.add_terminal(TokenNode('from')) self.visit(node.cause)
def __sequence_datastruct_visit(self, node, open_brace: str, close_brace: str): self.add_terminal(TokenNode(open_brace)) for idx, element in enumerate(node.elts): self.visit(element) self.add_terminal( TokenNode(',') ) # Always add , this is always correct and useful for len one tuples. self.add_terminal(TokenNode(close_brace))
def visit_While(self, node: While): self.add_terminal(TokenNode('while')) self.visit(node.test) self.__visit_statement_block(node.body) if node.orelse is None: return self.add_terminal(TokenNode('else')) self.__visit_statement_block(node.orelse)
def __visit_with(self, node: Union[With, AsyncWith], is_asyc: bool): # TODO: There is a type comment here! node.type_comment if is_asyc: self.add_terminal(TokenNode('async')) self.add_terminal(TokenNode('with')) for i, w_item in enumerate(node.items): self.visit(w_item) if i < len(node.items) - 1: self.add_terminal(TokenNode(',')) self.__visit_statement_block(node.body)
def visit_Lambda(self, node: Lambda): self.add_terminal(TokenNode('lambda')) try: self.__enter_child_symbol_table('function', 'lambda', node.lineno) self.visit(node.args) self.add_terminal(TokenNode(':')) self.visit(node.body) self.__scope_symtable.pop() except ValueError: pass # In the rare case of nexted lambdas symtable acts odd...
def visit_Slice(self, node): self.add_terminal(TokenNode('[')) if node.lower is not None: self.visit(node.lower) self.add_terminal(TokenNode(':')) if node.upper is not None: self.visit(node.upper) if node.step is not None: self.add_terminal(TokenNode(':')) self.visit(node.step) self.add_terminal(TokenNode(']'))
def visit_Dict(self, node): self.add_terminal(TokenNode('{')) for idx, (key, value) in enumerate(zip(node.keys, node.values)): if key is None: self.add_terminal(TokenNode('None')) else: self.visit(key) self.add_terminal(TokenNode(':')) self.visit(value) if idx < len(node.keys) - 1: self.add_terminal(TokenNode(',')) self.add_terminal(TokenNode('}'))
def __visit_statement_block(self, stmts: List): self.add_terminal(TokenNode( self.INDENT)) # Skip ":" since it is implied for i, statement in enumerate(stmts): self.visit(statement) if i < len(stmts) - 1: self.add_terminal(TokenNode(self.NLINE)) if i > 0: self._add_edge(stmts[i - 1], statement, edge_type=EdgeType.NEXT) self.add_terminal(TokenNode(self.DEDENT))
def __get_symbol_for_name(self, name, lineno, col_offset): if isinstance(name, str): node = TokenNode(name, lineno, col_offset) self.add_terminal(node) if self.__scope_symtable[-1].get_type( ) == 'class' and name.startswith('__') and not name.endswith('__'): name = '_' + self.__scope_symtable[-1].get_name() + name current_idx = len(self.__scope_symtable) - 1 while current_idx >= 0: try: symbol = self.__scope_symtable[current_idx].lookup(name) break except KeyError: current_idx -= 1 else: logging.warning( f'Symbol "{name}"@{lineno}:{col_offset} Not Found!') symbol = None else: node = name assert isinstance(node, Attribute) # Heuristic: create symbols only for attributes of the form X.Y and X.Y.Z self.visit(node.value) self.add_terminal(TokenNode('.', node.lineno, node.col_offset)) self.add_terminal( TokenNode(node.attr, node.lineno, node.col_offset)) if isinstance(node.value, Name): name = f'{node.value.id}.{node.attr}' symbol = StrSymbol(name) elif isinstance(node.value, Attribute) and isinstance( node.value.value, Name): name = f'{node.value.value.id}.{node.value.attr}.{node.attr}' symbol = StrSymbol(name) else: symbol = None if isinstance(symbol, StrSymbol): symbol_type = 'variable' elif isinstance(symbol, Symbol): if symbol.is_namespace(): symbol_type = 'class-or-function' elif symbol.is_parameter(): symbol_type = 'parameter' elif symbol.is_imported(): symbol_type = 'imported' else: symbol_type = 'variable' else: symbol_type = None return name, node, symbol, symbol_type
def __visit_for(self, node, is_async: bool): if is_async: self.add_terminal(TokenNode('async')) self.add_terminal(TokenNode('for')) self.visit(node.target) self.add_terminal(TokenNode('in')) self.visit(node.iter) self._add_edge(node.target, node.iter, EdgeType.COMPUTED_FROM) self.__visit_statement_block(node.body) if node.orelse is not None: self.add_terminal(TokenNode('else')) self.__visit_statement_block(node.orelse)
def visit_comprehension(self, node: comprehension): if node.is_async: self.add_terminal(TokenNode('async')) self.add_terminal(TokenNode('for')) self.visit(node.target) self.add_terminal(TokenNode('in')) inner_symtable = self.__scope_symtable.pop() self.visit(node.iter) self.__scope_symtable.append(inner_symtable) for if_ in node.ifs: self.add_terminal(TokenNode('if')) self.visit(if_)
def visit_Try(self, node: Try): self.add_terminal(TokenNode('try')) self.__visit_statement_block(node.body) for i, exc_handler in enumerate(node.handlers): self.visit(exc_handler) if i > 0: self._add_edge(node.handlers[i - 1], exc_handler, EdgeType.NEXT) if node.orelse: self.add_terminal(TokenNode('else')) self.__visit_statement_block(node.orelse) if node.finalbody: self.add_terminal(TokenNode('finally')) self.__visit_statement_block(node.finalbody)
def visit_SetComp(self, node): self.__enter_child_symbol_table('function', 'setcomp', node.lineno) try: self.add_terminal(TokenNode('{')) self.visit(node.elt) for i, generator in enumerate(node.generators): if i > 0: # When we have multiple generators, then the symbol table of the iter is in the setcomp symboltable. # Reasonable, but I don't see any other self.__scope_symtable.append(self.__scope_symtable[-1]) self.visit(generator) if i > 0: self.__scope_symtable.pop() self.add_terminal(TokenNode('}')) finally: self.__scope_symtable.pop()
def visit_Nonlocal(self, node: Nonlocal): self.add_terminal(TokenNode('nonlocal')) for name in node.names: self.__visit_variable_like(name, node.lineno, node.col_offset, can_annotate_here=False)
def __add_subtoken_of_edges(self): def is_identifier_node(n): if not isinstance(n, str) and not isinstance(n, TokenNode): return False if not self.IDENTIFER_REGEX.fullmatch(str(n)): return False if keyword.iskeyword(str(n)): return False if n == self.INDENT or n == self.DEDENT or n == self.NLINE: return False return True all_identifier_like_nodes: Set[TokenNode] = { n for n in self.__node_to_id if is_identifier_node(n) } subtoken_nodes: Dict[str, TokenNode] = {} for node in all_identifier_like_nodes: for subtoken in split_identifier_into_parts(str(node)): if subtoken == '_': continue subtoken_dummy_node = subtoken_nodes.get(subtoken) if subtoken_dummy_node is None: subtoken_dummy_node = TokenNode(subtoken) subtoken_nodes[subtoken] = subtoken_dummy_node self._add_edge(subtoken_dummy_node, node, EdgeType.SUBTOKEN_OF)
def visit_Call(self, node: Call): self.visit(node.func) self.add_terminal(TokenNode('(')) num_args = len(node.args) + len(node.keywords) num_args_added = 0 for arg in node.args: self.visit(arg) num_args_added += 1 if num_args_added < num_args: self.add_terminal(TokenNode(',')) for arg in node.keywords: self.visit(arg) num_args_added += 1 if num_args_added < num_args: self.add_terminal(TokenNode(',')) self.add_terminal(TokenNode(')'))
def visit_ExceptHandler(self, node): self.add_terminal(TokenNode('except')) if node.type: self.visit(node.type) if node.name: self.__visit_variable_like(node.name, node.lineno, node.col_offset, can_annotate_here=False) self.__visit_statement_block(node.body)
def visit_AugAssign(self, node: AugAssign): if isinstance(node.target, Name) or isinstance(node.target, Attribute): self.__visit_variable_like(node.target, node.lineno, node.col_offset, can_annotate_here=False) else: self.visit(node.target) self._add_edge(node.target, node.value, EdgeType.COMPUTED_FROM) self.add_terminal(TokenNode(self.BINOP_SYMBOLS[type(node.op)] + '=')) self.visit(node.value)
def visit_ClassDef(self, node): # Add class inheritance (if any) self.__type_graph.add_class(node.name, [ self.__type_graph.canonicalize_annotation( parse_type_annotation_node(parent), self.__imported_symbols) for parent in node.bases ]) if len(node.bases) == 0: self.__type_graph.add_class(node.name, [parse_type_annotation_node('object')]) for decorator in node.decorator_list: self.add_terminal(TokenNode('@')) self.visit(decorator) self.add_terminal(TokenNode('class')) self.add_terminal(TokenNode(node.name, node.lineno, node.col_offset)) if len(node.bases) > 0: self.add_terminal(TokenNode('(')) for i, base in enumerate(node.bases): self.visit(base) if i < len(node.bases) - 1: self.add_terminal(TokenNode(',')) self.add_terminal(TokenNode(')')) self.__enter_child_symbol_table('class', node.name, node.lineno) try: self.__visit_statement_block(node.body) finally: self.__scope_symtable.pop()
def visit_AnnAssign(self, node: AnnAssign): self.__visit_variable_like(node.target, node.target.lineno, node.target.col_offset, can_annotate_here=True, type_annotation=parse_type_annotation_node( node.annotation)) if node.value is not None: self.add_terminal(TokenNode('=')) self.visit(node.value) self._add_edge(node.target, node.value, EdgeType.COMPUTED_FROM)
def __visit_function(self, node: Union[FunctionDef, AsyncFunctionDef], is_async: bool): for decorator in node.decorator_list: self.add_terminal(TokenNode('@')) self.visit(decorator) if is_async: self.add_terminal(TokenNode('async')) self.add_terminal(TokenNode('def')) t = None if node.returns is not None: t = parse_type_annotation_node(node.returns) elif node.type_comment is not None and "->" in node.type_comment: # TODO: Add support for argument types t = node.type_comment t = t.split("->")[-1].strip() t = parse_type_comment(t) symbol_name = node.name self.__visit_variable_like(symbol_name, node.lineno, node.col_offset, can_annotate_here=True, type_annotation=t) old_return_scope = self.__return_scope self.__enter_child_symbol_table('function', node.name, node.lineno) try: self.add_terminal(TokenNode('(')) self.visit(node.args) self.add_terminal(TokenNode(')')) self.__return_scope = node self.__visit_statement_block(node.body) finally: self.__return_scope = old_return_scope self.__scope_symtable.pop()
def visit_arguments(self, node: arguments): defaults = [None ] * (len(node.args) - len(node.defaults)) + node.defaults for i, (argument, default) in enumerate(zip(node.args, defaults)): self.visit(argument) if default is not None: self.add_terminal(TokenNode('=')) inner_symtable = self.__scope_symtable.pop() self.visit(default) self.__scope_symtable.append(inner_symtable) self._add_edge(argument, default, EdgeType.COMPUTED_FROM) self.add_terminal(TokenNode(',')) if i > 0: self._add_edge(node.args[i - 1], argument, EdgeType.NEXT) if node.vararg is not None: self.add_terminal(TokenNode('*')) self.visit(node.vararg) self.add_terminal(TokenNode(',')) if node.kwarg is not None: self.add_terminal(TokenNode('**')) self.visit(node.kwarg) if len(node.kwonlyargs) > 0: self.add_terminal(TokenNode('*')) self.add_terminal(TokenNode(',')) defaults = [None] * (len(node.kwonlyargs) - len(node.kw_defaults)) + node.kw_defaults for argument, default in zip(node.kwonlyargs, defaults): self.visit(argument) if default is not None: self.add_terminal(TokenNode('=')) inner_symtable = self.__scope_symtable.pop() self.visit(default) self.__scope_symtable.append(inner_symtable) self._add_edge(argument, default, EdgeType.COMPUTED_FROM) self.add_terminal(TokenNode(','))
def visit_UnaryOp(self, node): op = self.UNARYOP_SYMBOLS[type(node.op)] self.add_terminal(TokenNode(op)) self.visit(node.operand)
def visit_Starred(self, node: Starred): self.add_terminal(TokenNode('*')) self.visit(node.value)
def visit_Subscript(self, node: Subscript): self.visit(node.value) self.add_terminal(TokenNode('[')) self.visit(node.slice) self.add_terminal(TokenNode(']'))
def visit_Pass(self, node): self.add_terminal(TokenNode('pass'))
def visit_ExtSlice(self, node: ExtSlice): for i, value in enumerate(node.dims): self.visit(value) if i < len(node.dims) - 1: self.add_terminal(TokenNode(','))
def visit_Ellipsis(self, node): self.add_terminal(TokenNode('...'))