def parse_src_files(basefolder, seperate_trees=False, verbose=0): if basefolder.endswith("python"): X_names, y, problems = get_ast_src_files(basefolder) X, y, tags = np.array([ast_parse_file(name) for name in tqdm(X_names) ]), np.array(y), problems if verbose == 1: dump(X, y, X_names) return X, y, tags, AstNodes() elif basefolder.endswith("python_trees"): X_names, y, problems = get_ast_src_files(basefolder) X, y, tags = np.array([parse_ast_tree(name) for name in tqdm(X_names) ]), np.array(y), problems return X, y, tags, AstNodes() elif basefolder.endswith("cpp"): X_names, y, problems = get_dot_src_files(basefolder) extend_X = [] extend_X_names = [] extend_y = [] for id, name in enumerate(tqdm(X_names)): program_trees = parse_tree(name, seperate_trees) extend_X.extend(program_trees) extend_y.extend([y[id]] * len(program_trees)) extend_X_names.extend([name] * len(program_trees)) X, y, tags, X_names = np.array(extend_X), np.array( extend_y), problems, extend_X_names return X, y, tags, DotNodes()
def traverse1(basefolder): trees = [] users = [] problems = [] for file in tqdm(os.listdir(basefolder)): trees.append(parse_tree(os.path.join(basefolder, file))) users.append(file.split('.')[0]) return np.array(trees), np.array(users), np.array(problems)
def get_dot_files2(basefolder, seperate_trees=False): trees = [] users = [] problems = [] for file in tqdm(os.listdir(basefolder)): program_trees = parse_tree(os.path.join(basefolder, file), seperate_trees) trees.extend(program_trees) users.extend([file.split('.')[0]] * len(program_trees)) return np.array(trees), np.array(users), np.array(problems)
def get_dot_files(basefolder): trees = [] users = [] problems = [] for folder in [ f for f in os.listdir(basefolder) if os.path.isdir(os.path.join(basefolder, f)) ]: for number in os.listdir(os.path.join(basefolder, folder)): file = [ filename for filename in os.listdir( os.path.join(basefolder, folder, number)) if filename.endswith(".tree") ][0] trees.append( parse_tree(os.path.join(basefolder, folder, number, file))) users.append(folder) return np.array(trees), np.array(users), np.array(problems)
def traverse(basefolder): trees = [] users = [] problems = [] for folder in [ f for f in os.listdir(basefolder) if os.path.isdir(os.path.join(basefolder, f)) ]: print(folder, " ...", end=" ", flush=True) for number in os.listdir(os.path.join(basefolder, folder)): file = [ filename for filename in os.listdir( os.path.join(basefolder, folder, number)) if filename.endswith(".tree") ][0] trees.append( parse_tree(os.path.join(basefolder, folder, number, file))) users.append(folder) print(number, end=" ", flush=True) print("done.") return np.array(trees), np.array(users), np.array(problems)
nodename += ")" # elif hasattr(node,"_content"): # for name, value in node.content(): # field = getattr(node, name) # if type(field) in [int, str, float]: # nodename += str(getattr(node, name)) while len(stack) > 0 and stack[0] >= depth: print(' ' * stack[0] * 2 + "]") stack = stack[1:] stack = [depth] + stack print(' ' * depth * 2 + nodename) if __name__ == "__main__": # filename = os.path.join("..","ast_tree", 'dump_program.py') # ast_tree = ast_parse_file(filename) # # print(list(ast_paths(ast_tree))) # tree_print(ast_tree,callback=print_ast_node_tree) # for s in stack: # print(' ' * s * 2 + "]") filename = os.path.join("..", "dataset", "all_cpp", 'Brian_Harris.50.p_saveg.tree') ast_tree = parse_tree(filename) # print(list(ast_paths(ast_tree))) tree_print(ast_tree[0], callback=print_dot_node) print(max_depth(ast_tree[0])) print(max_branch(ast_tree[0])) # for s in stack: # print(' ' * s * 2 + "]")