def nodes_links(node_ids,tree): def nodes_links_lambda(x, d, o): for child in children(x): o[node_ids[x.uuid][1]].append(node_ids[child.uuid][1]) return o links = defaultdict(list) bfs(tree, callback=nodes_links_lambda, mode="all", out=links) # for x,id in node_ids: # links[id].append() return links
def nodes_ids(tree): def nodes_ids_lambda(x, d, o): global c o[x.uuid] = (x, c) c += 1 return o global c c = 0 ids = {} bfs(tree, callback=nodes_ids_lambda, mode="all", out=ids) # for x,id in node_ids: # links[id].append() return ids
def avg_branch(ast_tree): def avg_branch_lambda(x, d, o): count = len(list(children(x))) if count > 0: o.append(count) out = bfs(ast_tree, callback=avg_branch_lambda, mode="all", out=[]) return int(np.mean(out))
def tf_node_leaves(self, ast_tree): out = defaultdict(int) def tf_nodes(x, d, o): o[self.astnodes.index(x)] += 1 out = bfs(ast_tree, callback=tf_nodes, mode="leaves", out=out) return out
def max_depth(ast_tree): def max_depth_lambda(x, d, o): if len(o) == 0: o.append(d) elif d > o[0]: o[0] = d out = bfs(ast_tree, callback=max_depth_lambda, mode="all", out=[]) return out[0]
def ngrams_node_fast(self, ast_tree, ngram=2): out = [] def ngrams_nodes(x, d, o): # grams_idx = tuple(self.astnodes.index(gram) for gram in grams) # grams_idx = tuple(type(gram).__name__ for gram in grams) o.append(self.astnodes.index(x)) return bfs(ast_tree, callback=ngrams_nodes, mode="leaves", out=out)
def max_branch(ast_tree): def max_branch_lambda(x, d, o): count = len(list(children(x))) if len(o) == 0: o.append(count) elif count > o[0]: o[0] = count out = bfs(ast_tree, callback=max_branch_lambda, mode="all", out=[]) return out[0]
def tf_ngrams_node(self, ast_tree, ngram=2): # out = [] out = defaultdict(int) def ngrams_nodes(x, d, o, ngram=ngram): successors = list(children(x)) if len(successors) > 0: father = x for grams in zip(*[successors[i:] for i in range(0, ngram - 1)]): grams = (self.astnodes.index(father),) + tuple(self.astnodes.index(gram) for gram in grams) o[self.astnodes.index(grams)] += 1 return bfs(ast_tree, callback=ngrams_nodes, mode="all", out=out)
def avg_node_leaves(self, ast_tree): out = defaultdict(list) def avg_nodes(x, d, o): info = o[self.astnodes.index(x)] if len(info) == 0: info.extend([0, 0]) info[0] += d info[1] += 1 out = bfs(ast_tree, callback=avg_nodes, mode="leaves", out=out) out_avg = {k: v[0] / v[1] for k, v in out.items()} return out_avg
def tf_ngrams_node_fast(self, ast_tree, ngram=2): out = defaultdict(int) def ngrams_nodes(x, d, o, ngram=ngram, predecessor=tuple()): if len(predecessor) < ngram: predecessor = predecessor + (x,) # (type(x).__name__,) for child in children(x): grams = ngrams_nodes(child, d, o, ngram=ngram, predecessor=predecessor) if len(grams) == ngram: grams_idx = tuple(self.astnodes.index(gram) for gram in grams) o[self.astnodes.index(grams_idx)] += 1 return predecessor return bfs(ast_tree, callback=ngrams_nodes, mode="all", out=out)
def convert_src_files(basefolder): X_names, y, problems = get_ast_src_files(basefolder) X ,y,tags = np.array([ast_parse_file(name) for name in tqdm(X_names)]), np.array(y), problems for name,tree in zip(X_names,X): name = os.path.basename(name) tree = unify_children(tree) # print(1) with open(os.path.join("..","dataset","python_trees",os.path.splitext(name)[0]+".tree"),"+w") as file: cc = [] coun = len(bfs(tree,callback=count_nodes,out=cc)) node_ids = nodes_ids(tree) node_links = nodes_links(node_ids, tree) for k,(v,l) in node_ids.items(): file.write(">{0}\t{1}\n".format(l,type(v).__name__)) for k,v in node_links.items(): file.write("<{0}={1}\n".format(str(k),",".join([str(i) for i in v])))
def tf_skip_grams_node_fast(self, ast_tree, ngram=2,v_skip=0): out = defaultdict(int) def ngrams_nodes(x, d, o, ngram=ngram,v_skip=v_skip,predecessor=tuple()): if len(predecessor) < ngram+v_skip: predecessor = predecessor + (x,) # (type(x).__name__,) for child in children(x): grams = ngrams_nodes(child, d, o, ngram=ngram,v_skip=v_skip,predecessor=predecessor) grams = grams[::v_skip+1] if len(grams) == ngram: grams_idx = tuple(self.astnodes.index(gram) for gram in grams) # grams_idx = tuple(type(gram).__name__ for gram in grams) o[self.astnodes.index(grams_idx)] += 1 # o[grams_idx] += 1 return predecessor return bfs(ast_tree, callback=ngrams_nodes, mode="all", out=out)
def avg_depth(ast_tree): def avg_depth_lambda(x, d, o): o.append(d) out = bfs(ast_tree, callback=avg_depth_lambda, mode="all", out=[]) return int(np.mean(out))