def get_names_lengths(tree, is_ast3=False): tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) result = [] for node in tree_walk: if hasattr(node, 'id'): result.append(len(node.id)) return result
def get_slices_count(tree, is_ast3=False): count = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) slices = ast27.Slice if not is_ast3 else ast3.Slice for node in tree_walk: if node.__class__ == slices: count += 1 return count
def get_compreh_count(tree, is_ast3=False): count = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) comp = (ast27.ListComp, ast27.SetComp, ast27.DictComp) if not is_ast3 else (ast3.ListComp, ast3.SetComp, ast3.DictComp) for node in tree_walk: if node.__class__ in comp: count += 1 return count
def get_unique_keywords(tree, is_ast3=False): unique_keywords = [] tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) keyw_dict = keywords_dict if not is_ast3 else keywords_dict3 for node in tree_walk: keywords = [k for k in keyw_dict if k['ast_class'] == node.__class__] for k in (k for k in keywords if k['name'] not in unique_keywords): if 'condition' not in k and k['name']: unique_keywords.append(k['name']) elif k['condition'](node) and k['name']: unique_keywords.append(k['name']) return unique_keywords
def get_branching_factor(tree, is_ast3=False): child_count = 0 nodes_count = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) for node in tree_walk: has_children = False child_nodes = ast27.iter_child_nodes(node) if not is_ast3 else ast3.iter_child_nodes(node) for n in child_nodes: has_children = True child_count += 1 if has_children: nodes_count += 1 return child_count / nodes_count if nodes_count > 0 else 1
def get_functions_info(tree, is_ast3=False): args_count = [] name_lengths = [] funcs_count = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) func_def = ast27.FunctionDef if not is_ast3 else ast3.FunctionDef for node in tree_walk: if isinstance(node, func_def): funcs_count += 1 args_count.append(len(node.args.args)) name_lengths.append(len(node.name)) FunctionsInfo = namedtuple('FunctionsInfo', 'func_count args_count name_lengths') return FunctionsInfo(func_count=funcs_count, args_count=args_count, name_lengths=name_lengths)
def get_literals_count(tree, is_ast3=False): literals_count = 0 is_literal = lambda node: isinstance(node, ast27.Str) or isinstance(node, ast27.Num) \ or isinstance(node, ast27.List) \ or isinstance(node, ast27.Dict) or isinstance(node, ast27.Tuple) \ or isinstance(node, ast27.Set) is_literal3 = lambda node: isinstance(node, ast3.Str) or isinstance(node, ast3.Num) \ or isinstance(node, ast3.List) \ or isinstance(node, ast3.Dict) or isinstance(node, ast3.Tuple) \ or isinstance(node, ast3.Set) tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) check_literal = is_literal if not is_ast3 else is_literal3 for node in tree_walk: if check_literal(node): literals_count += 1 return literals_count
def get_bigrams_freq(tree, is_ast3=False): terms_combo = list(itertools.product( terms_ast27, repeat=2)) if not is_ast3 else list( itertools.product(terms_ast3, repeat=2)) bigram_count = {} total_count = 0 for terms in terms_combo: bigram_count[terms] = 0 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) for node in tree_walk: if hasattr(node, 'body'): try: if isinstance(node.body, list): for n in node.body: class_tuple = (node.__class__, n.__class__) total_count += 1 bigram_count[class_tuple] += 1 else: class_tuple = (node.__class__, node.body.__class__) total_count += 1 bigram_count[class_tuple] += 1 except KeyError: continue elif hasattr(node, 'value'): try: if isinstance(node.value, list): for n in node.value: class_tuple = (node.__class__, n.__class__) total_count += 1 bigram_count[class_tuple] += 1 else: class_tuple = (node.__class__, node.value.__class__) total_count += 1 bigram_count[class_tuple] += 1 except KeyError: continue if total_count > 0: bigram_count = {k: v / total_count for k, v in bigram_count.items()} return list(bigram_count.values())
def get_term_frequency(tree, is_ast3=False): tf_info = {} idf_info = {} total_count = 0 term_dict = terms_ast27 if not is_ast3 else terms_ast3 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) for term in term_dict: tf_info[term] = 0 idf_info[term] = 0 for node in tree_walk: # if not hasattr(node, 'body'): # continue if node.__class__ in term_dict: total_count += 1 tf_info[node.__class__] += 1 idf_info[node.__class__] = 1 if total_count > 0: tf_info = {k: v / total_count for k, v in tf_info.items()} return list(tf_info.values()), list(idf_info.values())
def get_keywords_count(tree, is_ast3=False): keywords_count = {} keyw_dict = keywords_dict if not is_ast3 else keywords_dict3 tree_walk = ast27.walk(tree) if not is_ast3 else ast3.walk(tree) for node in tree_walk: keyword_nodes = [ k for k in keyw_dict if k['ast_class'] == node.__class__ ] for k in keyword_nodes: if 'condition' not in k or k['condition'](node): if k['name'] not in keywords_count: keywords_count[k['name']] = 1 else: keywords_count[k['name']] += 1 result = [] for k in keyw_dict: if k['name'] in keywords_count: result.append(keywords_count[k['name']]) else: result.append(0) return result