def ast_paths(here, path_to_here=()): path_to_here += ast_name(here) for v in children(here): # Iterate over all key-value pairs in the node... for p in ast_paths(v, path_to_here ): # produce all paths rooted at that subtree; adding k to the # current path produces a set of alternating key-value sets yield p if len(list(children(here))) == 0: yield path_to_here
def traverse(model, node, label, train_mode): nonlocal curr_timesteps children_ast = list(children(node)) if len(children_ast) == 0: # leaf node curr_timesteps = curr_timesteps + 1 return model.embed_vec(node, train_mode=train_mode) else: # internal node children_nodes = [] for child in children_ast: if child is not None: child_node = traverse(model, child, label, train_mode=train_mode) children_nodes.append(child_node) x = model.embed_vec(node, train_mode=train_mode) new_node = model.merge(x, children_nodes, train_mode=train_mode) curr_timesteps += 1 if curr_timesteps >= bptt_limit: loss = model.loss(new_node, label, train_mode) model.zerograds() loss.backward() optimizer.update() curr_timesteps = 0 return new_node
def ngrams_nodes(x, d, o, ngram=ngram): successors = list(children(x)) if len(successors) > 0: father = x for grams in zip(*[successors[i:] for i in range(0, ngram - 1)]): grams = (self.astnodes.index(father),) + tuple(self.astnodes.index(gram) for gram in grams) o[self.astnodes.index(grams)] += 1
def ngrams_nodes(x, d, o, ngram=ngram, predecessor=tuple()): if len(predecessor) < ngram: predecessor = predecessor + (x,) # (type(x).__name__,) for child in children(x): grams = ngrams_nodes(child, d, o, ngram=ngram, predecessor=predecessor) if len(grams) == ngram: grams_idx = tuple(self.astnodes.index(gram) for gram in grams) o[self.astnodes.index(grams_idx)] += 1 return predecessor
def split_trees(X, y, problems): subX = [] subY = [] subProblem = [] for i, tree in enumerate(X): ast_children = children(tree) for child in ast_children: subX.append(child) subY.append(y[i]) subProblem.append(problems[i]) return np.array(subX), np.array(subY), np.array(subProblem)
def make_binary_tree(src_tree, dst_tree, max_branches=2): childs = list(children(src_tree)) if len(childs) > 0: dst_tree.children.extend(childs[:max_branches]) if len(childs) - max_branches > 0: if len(childs) - max_branches == 1: dst_tree.children.append(childs[-1]) else: dst_node = copy.copy(dst_tree) dst_node.children = [] src_node = copy.copy(dst_tree) src_node.children = childs[max_branches:] dst_tree.children.append( make_binary_tree(src_node, dst_node)) for idx, child in enumerate(children(dst_tree)): dst_child = copy.copy(child) dst_child.children = [] dst_tree.children[idx] = make_binary_tree( child, dst_child, max_branches) return dst_tree
def ngrams_nodes(x, d, o, ngram=ngram,v_skip=v_skip,predecessor=tuple()): if len(predecessor) < ngram+v_skip: predecessor = predecessor + (x,) # (type(x).__name__,) for child in children(x): grams = ngrams_nodes(child, d, o, ngram=ngram,v_skip=v_skip,predecessor=predecessor) grams = grams[::v_skip+1] if len(grams) == ngram: grams_idx = tuple(self.astnodes.index(gram) for gram in grams) # grams_idx = tuple(type(gram).__name__ for gram in grams) o[self.astnodes.index(grams_idx)] += 1 # o[grams_idx] += 1 return predecessor
def traverse_rec(self, node, train_mode): children_ast = list(children(node)) if len(children_ast) == 0: # leaf node lf = self.leaf(node, train_mode=train_mode) return lf else: # internal node children_nodes = [] for child in children_ast: child_node = self.traverse_rec(child, train_mode=train_mode) children_nodes.append(child_node) x = self.embed_vec(node, train_mode=train_mode) return self.merge(x, children_nodes, train_mode=train_mode)
def split_trees2(X, y, problems, original=False): subX = list(X) if original else [] subY = list(y) if original else [] subProblem = list(problems) if original else [] for i, tree in enumerate(X): functions = [] classes = [] imports = [] global_code = [] ast_children = children(tree) for child in ast_children: child_name = type(child).__name__ if child_name == "FunctionDef": functions.append(child) elif child_name == "ClassDef": classes.append(child) elif child_name == "ImportFrom" or child_name == "Import": imports.append(child) else: global_code.append(child) # add functions subX.extend(functions) subY.extend([y[i]] * len(functions)) # add classes subX.extend(classes) subY.extend([y[i]] * len(classes)) # add imports tree.body = [] import_module = copy.deepcopy(tree) import_module.body = imports subX.append(import_module) subY.append(y[i]) # add the rest of the code ( global instructions) tree.body = [] global_module = copy.deepcopy(tree) global_module.body = global_code subX.append(global_module) subY.append(y[i]) subProblem.append(problems[i]) return np.array(subX), np.array(subY), np.array(subProblem)
def traverse_rec(self, node, train_mode): children_ast = list(children(node)) if len(children_ast) == 0: # leaf node lf = self.leaf(node, train_mode=train_mode) return lf else: # internal node children_nodes = [] for child in children_ast: child_node = self.traverse_rec(child, train_mode=train_mode) children_nodes.append(child_node) if len(children_nodes) < self.n_children: c, h = self.leaf(None, train_mode) children_nodes.extend([ (c, h) for i in range(self.n_children - len(children_nodes)) ]) elif len(children_nodes) > self.n_children: children_nodes = children_nodes[:self.n_children] x = self.embed_vec(node, train_mode=train_mode) return self.merge(x, children_nodes, train_mode=train_mode)
def avg_branch_lambda(x, d, o): count = len(list(children(x))) if count > 0: o.append(count)
def max_branch_lambda(x, d, o): count = len(list(children(x))) if len(o) == 0: o.append(count) elif count > o[0]: o[0] = count
def nodes_links_lambda(x, d, o): for child in children(x): o[node_ids[x.uuid][1]].append(node_ids[child.uuid][1]) return o
def nuke_all_strings(mod): if isinstance(mod, ast.Str): mod.s = '' for child in children(mod): nuke_all_strings(child)
def convert_tree(src_tree): childern = [] for child in children(src_tree): childern.append(convert_tree(child)) src_tree.children = childern return src_tree