def get_names_lengths(tree, is_ast3=False): tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) result = [] for node in tree_walk: if hasattr(node, 'id'): result.append(len(node.id)) return result
def get_slices_count(tree, is_ast3=False): count = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) slices = ast27.Slice if not is_ast3 else ast3.Slice for node in tree_walk: if node.__class__ == slices: count += 1 return count
def filter_ast(self, startingNode, objectType): """ This method is just a auxiliary method that is used to filter from a starting node and then looks at all the children. :param startingNode: The node you want to start from. Should be an AST object. :param objectType: The type you want to look for. Ex. _ast3.Dict :return: An Iterable with all the object of type you specified """ return filter(lambda x: type(x) is objectType, ast3.walk(startingNode))
def get_compreh_count(tree, is_ast3=False): count = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) comp = (ast27.ListComp, ast27.SetComp, ast27.DictComp) if not is_ast3 else (ast3.ListComp, ast3.SetComp, ast3.DictComp) for node in tree_walk: if node.__class__ in comp: count += 1 return count
def get_unique_keywords(tree, is_ast3=False): unique_keywords = [] tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) keyw_dict = keywords_dict if not is_ast3 else keywords_dict3 for node in tree_walk: keywords = [k for k in keyw_dict if k['ast_class'] == node.__class__] for k in (k for k in keywords if k['name'] not in unique_keywords): if 'condition' not in k and k['name']: unique_keywords.append(k['name']) elif k['condition'](node) and k['name']: unique_keywords.append(k['name']) return unique_keywords
def get_branching_factor(tree, is_ast3=False): child_count = 0 nodes_count = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) for node in tree_walk: has_children = False child_nodes = ast27.iter_child_nodes(node) if not is_ast3 else ast3.iter_child_nodes(node) for n in child_nodes: has_children = True child_count += 1 if has_children: nodes_count += 1 return child_count / nodes_count if nodes_count > 0 else 1
def get_functions_info(tree, is_ast3=False): args_count = [] name_lengths = [] funcs_count = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) func_def = ast27.FunctionDef if not is_ast3 else ast3.FunctionDef for node in tree_walk: if isinstance(node, func_def): funcs_count += 1 args_count.append(len(node.args.args)) name_lengths.append(len(node.name)) FunctionsInfo = namedtuple('FunctionsInfo', 'func_count args_count name_lengths') return FunctionsInfo(func_count=funcs_count, args_count=args_count, name_lengths=name_lengths)
def _find_name(root: ast3.AST) -> Optional[str]: """Returns the first "name" field or ast3.Name expression in the AST.""" for node in ast3.walk(root): if isinstance(node, ast3.Name): return node.id if isinstance(node, ast3.Attribute): # value.attr, e.g. self.x. We want attr. return node.attr if isinstance(node, ast3.Subscript): # value[slice], e.g. x[1] or x[1:2]. We want value. return _find_name(node.value) name = getattr(node, "name", None) if name: return name return None
def find_enums(tree): for node in ast3.walk(tree): if not isinstance(node, ast3.Assign): continue if node.type_comment is None: continue if "." not in node.type_comment: continue if not node.type_comment.startswith("'"): continue comment = node.type_comment.strip("'") mod, cls = comment.rsplit(".", maxsplit=1) assert len(node.targets) == 1 name = node.targets[0].id # type: ignore yield (mod, cls, name)
def get_literals_count(tree, is_ast3=False): literals_count = 0 is_literal = lambda node: isinstance(node, ast27.Str) or isinstance(node, ast27.Num) \ or isinstance(node, ast27.List) \ or isinstance(node, ast27.Dict) or isinstance(node, ast27.Tuple) \ or isinstance(node, ast27.Set) is_literal3 = lambda node: isinstance(node, ast3.Str) or isinstance(node, ast3.Num) \ or isinstance(node, ast3.List) \ or isinstance(node, ast3.Dict) or isinstance(node, ast3.Tuple) \ or isinstance(node, ast3.Set) tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) check_literal = is_literal if not is_ast3 else is_literal3 for node in tree_walk: if check_literal(node): literals_count += 1 return literals_count
def get_bigrams_freq(tree, is_ast3=False): terms_combo = list(itertools.product( terms_ast27, repeat=2)) if not is_ast3 else list( itertools.product(terms_ast3, repeat=2)) bigram_count = {} total_count = 0 for terms in terms_combo: bigram_count[terms] = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) for node in tree_walk: if hasattr(node, 'body'): try: if isinstance(node.body, list): for n in node.body: class_tuple = (node.__class__, n.__class__) total_count += 1 bigram_count[class_tuple] += 1 else: class_tuple = (node.__class__, node.body.__class__) total_count += 1 bigram_count[class_tuple] += 1 except KeyError: continue elif hasattr(node, 'value'): try: if isinstance(node.value, list): for n in node.value: class_tuple = (node.__class__, n.__class__) total_count += 1 bigram_count[class_tuple] += 1 else: class_tuple = (node.__class__, node.value.__class__) total_count += 1 bigram_count[class_tuple] += 1 except KeyError: continue if total_count > 0: bigram_count = {k: v / total_count for k, v in bigram_count.items()} return list(bigram_count.values())
def get_term_frequency(tree, is_ast3=False): tf_info = {} idf_info = {} total_count = 0 term_dict = terms_ast27 if not is_ast3 else terms_ast3 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) for term in term_dict: tf_info[term] = 0 idf_info[term] = 0 for node in tree_walk: # if not hasattr(node, 'body'): # continue if node.__class__ in term_dict: total_count += 1 tf_info[node.__class__] += 1 idf_info[node.__class__] = 1 if total_count > 0: tf_info = {k: v / total_count for k, v in tf_info.items()} return list(tf_info.values()), list(idf_info.values())
def get_keywords_count(tree, is_ast3=False): keywords_count = {} keyw_dict = keywords_dict if not is_ast3 else keywords_dict3 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) for node in tree_walk: keyword_nodes = [ k for k in keyw_dict if k['ast_class'] == node.__class__ ] for k in keyword_nodes: if 'condition' not in k or k['condition'](node): if k['name'] not in keywords_count: keywords_count[k['name']] = 1 else: keywords_count[k['name']] += 1 result = [] for k in keyw_dict: if k['name'] in keywords_count: result.append(keywords_count[k['name']]) else: result.append(0) return result
def _build_parents(tree: ast.AST) -> None: for node in ast.walk(tree): for child in ast.iter_child_nodes(node): _parents[child] = node
def find(tree: ast.AST, type_: Type[T]) -> Iterable[T]: """Finds all nodes with type T.""" for node in ast.walk(tree): if isinstance(node, type_): yield node # type: ignore
def _find_parameters(self, endpoints): for i in range(len(endpoints)): # Find out if the view is a class or a method/function my_ast = self.processor.python_file_asts[endpoints[i] ['view_filepath']] view_context = None for x in ast3.walk(my_ast): if hasattr(x, 'name') and x.name == endpoints[i]['view_name']: view_context = x break # Let's add the view_context to our endpoints dictionary endpoints[i]['view_context'] = view_context render_methods = [] method_names = [ 'get', 'post', 'put', 'patch', 'delete', 'head', 'options', 'trace', 'form_valid' ] # Find all the methods/function we have to process if type(view_context) is _ast3.ClassDef: for b in view_context.body: if type(b) is _ast3.FunctionDef and b.name in method_names: render_methods.append(b) elif type(view_context) is _ast3.FunctionDef: render_methods.append(view_context) params = [] for method in render_methods: # Find the name of the request object within the method req_name = None if method.args.args[0].arg != 'self': req_name = method.args.args[0].arg else: if len(method.args.args) > 1: req_name = method.args.args[1].arg else: pass http_methods = [ 'GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD', 'OPTIONS', 'TRACE' ] # Now lets parse out the params # This section processes the following: # <req_name>.cleaned_data['first_name'] # <req_name>.<method_in_caps>["id"] # self.request.<method_in_caps>["id"] subscripts = self.processor.filter_ast(method, _ast3.Subscript) for subscript in subscripts: if (type(subscript.value) is _ast3.Attribute and subscript.value.attr == 'cleaned_data' and type(subscript.value.value) is _ast3.Name and subscript.value.value.id): # This processes the following: # <reqName>.cleaned_data['first_name'] try: value = ast3.literal_eval(subscript.slice.value) except ValueError: # Happens when the parameter name is dynamically generated # <reqName>.cleaned_data['first_name' + i] msg = "Couldn't resolve parameter name. File '%s' line '%d'" logger.warning(msg, endpoints[i]['view_filepath'], subscript.lineno) continue if type(value) is bytes: value = value.decode( "utf-8" ) # Accounting for weird bug in typed-ast library param_dict = { 'name': value, 'filepath': endpoints[i]['view_filepath'], 'line_number': subscript.lineno } params.append(param_dict) elif (type(subscript.value) is _ast3.Attribute and subscript.value.attr in http_methods and type(subscript.value.value) is _ast3.Name and subscript.value.value.id == req_name): # This processes the following: # <reqName>.<method_in_caps>["id"] try: value = ast3.literal_eval(subscript.slice.value) except ValueError: # Happens when the parameter name is dynamically generated # <reqName>.<method_in_caps>["id" + i] msg = "Couldn't resolve parameter name. File '%s' line '%d'" logger.warning(msg, endpoints[i]['view_filepath'], subscript.lineno) continue if type(value) is bytes: value = value.decode( "utf-8" ) # Accounting for weird bug in typed-ast library param_dict = { 'name': value, 'filepath': endpoints[i]['view_filepath'], 'line_number': subscript.lineno } params.append(param_dict) elif (type(subscript.value) is _ast3.Attribute and subscript.value.attr in http_methods and type(subscript.value.value) is _ast3.Attribute and subscript.value.value.attr == 'request' and type(subscript.value.value.value) is _ast3.Name and subscript.value.value.value.id == 'self'): # This processes the following: # self.request.<method_in_caps>["id"] try: value = ast3.literal_eval(subscript.slice.value) except ValueError: # Happens when the parameter name is dynamically generated # self.request.<method_in_caps>["id" + i] msg = "Couldn't resolve parameter name. File '%s' line '%d'" logger.warning(msg, endpoints[i]['view_filepath'], subscript.lineno) continue if type(value) is bytes: value = value.decode( "utf-8" ) # Accounting for weird bug in typed-ast library param_dict = { 'name': value, 'filepath': endpoints[i]['view_filepath'], 'line_number': subscript.lineno } params.append(param_dict) # This section processes the following: # <req_name>.<method_in_caps>.get("param_name", None) # self.request.<method_in_caps>.get("param_name", None) calls = self.processor.filter_ast(method, _ast3.Call) for call in calls: if (type(call.func) is _ast3.Attribute and call.func.attr == 'get'): if (type(call.func.value) is _ast3.Attribute and call.func.value.attr in http_methods): if (type(call.func.value.value) is _ast3.Name and call.func.value.value.id == req_name): # This processes the following: # <req_name>.<method_in_caps>.get("param_name", None) args = self.processor.parse_python_method_args( call, ['key', 'default']) if isinstance(args['key'], (bytes, str)): value = args['key'].decode( 'utf-8' ) if type( args['key']) is bytes else args['key'] param_dict = { 'name': value, 'filepath': endpoints[i]['view_filepath'], 'line_number': call.lineno } params.append(param_dict) elif (type( call.func.value.value) is _ast3.Attribute and call.func.value.value.attr == 'request' and type(call.func.value.value.value) is _ast3.Name and call.func.value.value.value.id == 'self'): # This processes the following: # self.request.<method_in_caps>.get("param_name", None) args = self.processor.parse_python_method_args( call, ['key', 'default']) if isinstance(args['key'], (bytes, str)): value = args['key'].decode( 'utf-8' ) if type( args['key']) is bytes else args['key'] param_dict = { 'name': value, 'filepath': endpoints[i]['view_filepath'], 'line_number': call.lineno } params.append(param_dict) # TODO: find the templates and see if they pull params out of the request object within the template endpoints[i]['params'] = params return endpoints
def _find_module_path_from_import(self, location_found, import_object, name): """ This method resolves an import to a file path :param location_found: The path to the file that contains the import :param import_object: The import object :param name: The name of the object/method/class you want to import :return: The file path to the import or None if it couldn't be found """ # First we have to check if we're dealing with a * import if import_object.names[0] != '*': if isinstance(import_object, _ast3.Import): # Root path is current directory root_path = location_found[:location_found.rfind('/') + 1] possible_path = root_path + name.replace('.', '/') + '.py' if possible_path in self.processor.python_file_asts: return possible_path elif isinstance(import_object, _ast3.ImportFrom): # Root path is module directory or current directory depending on level module_path = import_object.module.replace( '.', '/') + '/' if import_object.module else '' if import_object.level > 0: root_path = location_found[:location_found.rfind('/') + 1] elif import_object.level == 0 and module_path not in location_found: root_path = location_found[:location_found.rfind('/') + 1] elif import_object.level == 0 and module_path in location_found: root_path = location_found[:location_found.find(module_path )] else: logger.warning( "This is most likely a view from an external library: %s", name) return None # Try module/name.py, then module.py possible_path = root_path + module_path + name + '.py' if possible_path in self.processor.python_file_asts: return possible_path else: possible_path = root_path + module_path.rstrip('/') + '.py' if possible_path in self.processor.python_file_asts: return possible_path else: logger.warning( "This is most likely a view from an external library: %s", name) return None else: # This is an asterisks import so we'll have to search the module we find if isinstance(import_object, _ast3.ImportFrom) and import_object.level > 0: possible_path = "/".join( location_found.split('/')[:-import_object.level]) + "/" possible_path += import_object.module.replace('.', '/') + "/" possible_path += name + ".py" if possible_path in self.processor.python_file_asts: # We found the file for the module, let's see if it contains our import temp_ast = self.processor.python_file_asts[possible_path] for x in ast3.walk(temp_ast): if hasattr(x, 'name') and x.name == x['name'][0]: return possible_path return None else: possible_path = "/".join( location_found.split('/')[:-import_object.level]) + "/" possible_path += import_object.module.replace('.', '/') + ".py" if possible_path in self.processor.python_file_asts: # We found the file for the module, let's see if it contains our import temp_ast = self.processor.python_file_asts[ possible_path] for x in ast3.walk(temp_ast): if hasattr(x, 'name') and x.name == x['name'][0]: return possible_path return None else: logger.warning( "This is most likely an import from an external library: %s", import_object.module) return None
def _find_view_context(self, endpoints): for ep in range(len(endpoints)): view_call = endpoints[ep]['view_call'] location_found = endpoints[ep]['location_found'] base_module_path = location_found.split('/', 1)[0] + '/' view = { 'name': None, 'module': [], 'module_filepath': None, 'declaration_loc': location_found } # Let's resolve views that just strings if type(view_call) is str: view['name'] = view_call.split('.')[-1] view['module'] += [ var for var in base_module_path.split('/') if var ] view['module'] += view_call.split('.')[:-1] possible_path = '/'.join( view['module']) + '/' + view['name'] + '.py' if possible_path in self.processor.python_file_asts: view['module_filepath'] = possible_path else: possible_path = '/'.join(view['module']) + '.py' if possible_path in self.processor.python_file_asts: view['module_filepath'] = possible_path # Now let's handle everything that's not a string else: temp_view_name = [] for x in ast3.walk(view_call): if hasattr(x, 'id'): temp_view_name.append(x.id) elif hasattr(x, 'attr'): temp_view_name.append(x.attr) # Remove as_view() from name if 'as_view' in temp_view_name: temp_view_name.remove('as_view') # Lets reverse the view name to be in the right order temp_view_name = list(reversed(temp_view_name)) # load the value into our dictionary view['name'] = temp_view_name[1:] if len( temp_view_name) > 1 else temp_view_name[0] view['module'] = temp_view_name[0] # Find the module that contains the view my_ast = self.processor.python_file_asts[ location_found] # load the ast of where it was found # Search all the imports for view module imports = self.processor.filter_ast(my_ast, _ast3.Import) for i in imports: for n in i.names: if n.name == view['module']: view[ 'module_filepath'] = self._find_module_path_from_import( location_found, i, view['module']) elif n.asname == view['module']: view['module'] = n.name view['name'][0] = n.name # rename to real name view[ 'module_filepath'] = self._find_module_path_from_import( location_found, i, view['module']) # Search all import froms for view module import_froms = self.processor.filter_ast( my_ast, _ast3.ImportFrom) asterisk_imports = [] for i in import_froms: for n in i.names: if n.name == view['module']: # we found the import let's see if we can find the file path to the module view[ 'module_filepath'] = self._find_module_path_from_import( location_found, i, view['module']) elif n.asname == view['module']: # we found the import let's see if we can find the file path to the module view['module'] = n.name view['name'][0] = n.name # rename to real name view[ 'module_filepath'] = self._find_module_path_from_import( location_found, i, view['module']) elif n.name == "*": # Keep track of the from x import * asterisk_imports.append(i) # Check if the module is imported with an * if view['module_filepath'] is None: # check if we can find the modules in the asterisk_imports for i in asterisk_imports: path = self._find_module_path_from_import( location_found, i, view['module']) if path: view['module_filepath'] = path break endpoints[ep]['view_filepath'] = view['module_filepath'] endpoints[ep]['view_name'] = view['name'] if type( view['name']) is str else '.'.join(view['name']) return endpoints
from typed_ast import ast3 code = """ def hello(): name = "world" print(f"hello {world}") """ t = ast3.parse(code) print(t) print("----------------------------------------") print(ast3.dump(t)) print("----------------------------------------") for node in ast3.walk(t): print(node)
def add_parents(tree): for node in ast3.walk(tree): for child in ast3.iter_child_nodes(node): child.parent = node # type: ignore