def worker(folder, count): zf=ZipFile(folder,'r') df_graphs=[] for f in zf.namelist(): if f.endswith('.py'): file=zf.read(f) print count, "Foldername:"+folder, \ "Filename:"+f try: df_graph = ASTBuilder(file).build_AST() if df_graph is not None: df_json=df_graph.serialize() if int(df_json['count'])>1: prog_info={ 'folder':folder, 'file':f, 'graph':df_json} df_graphs.append(prog_info) except: print "Error while parsing file:",f pass if df_graphs: with open('graphs-zip/graph'+str(count)+'.txt','w') as f: for graph in df_graphs: f.write(json.dumps(graph)) f.write('\n'+'-'*60+'\n')
def worker(folder): filename = 'repoData/' + folder + '/allPythonContent.py' fullfile = open(filename).read() file_splits = fullfile.split('########NEW FILE########') df_graphs = {'folder': folder, 'files': []} for piece in file_splits: piece = piece.strip() piece_name = piece.split('\n')[0].strip() if len(piece_name.split()) == 3: file_name = piece_name.split()[2] try: print "Foldername:" + folder, "Filename:" + file_name df_graph = ASTBuilder(piece).build_AST() if df_graph is not None: df_json = df_graph.serialize() if int(df_json['count']) > 1: prog_info = {'file': file_name, 'graph': df_json} df_graphs['files'].append(prog_info) except: print "Unexpected error in worker:", sys.exc_info()[0] f_test = open('srcfiles/test.py', 'w') f_test.write(piece) f_test.close() proc_name = str(os.getpid()) if df_graphs['files']: f = open('graphs/graph-' + proc_name + '.txt', 'a') f.write(json.dumps(df_graphs)) f.write('\n' + '-' * 20 + '\n') f.close()
def compile(self, java_syntax_tree): print("JAVA -> C++ converter v0.1\n") print("Generating AST ..") builder = ASTBuilder() builder.build_ast(java_syntax_tree) builder.start_traverse()
def compile(self, java_syntax_tree): print("JAVA -> C++ converter v0.1\n") print("Generating AST ..") builder = ASTBuilder() builder.build_ast(java_syntax_tree) out = builder.start_traverse() with open("generated.cxx", "w") as handle: handle.write(out)
def main(argv): inputFile = FileStream(argv[1]) lexer = CLexer(inputFile) stream = CommonTokenStream(lexer) parser = CParser(stream) tree = parser.prog() if parser.getNumberOfSyntaxErrors(): return # Visualise parse tree parseTreeDotGen = ParseTreeDotGenerator() parseTreeDotGen.generateDOT(parser, tree, "output/parse_tree.gv", render=False) # Build AST astBuilder = ASTBuilder() AST = astBuilder.visit(tree) # Semantic Validation semanticValidator = SemanticValidator() AST.accept(semanticValidator) # Print errors, if any if semanticValidator.errors: for error in semanticValidator.errors: print("ERROR: " + error) return # Code optimiser optimiser = Optimiser(semanticValidator.symbolTable) AST.accept(optimiser) # Print warnings, if any if optimiser.warnings: for warning in optimiser.warnings: print("WARNING: " + warning) # Visualise AST dotGraph = AST.visit(DotGraphBuilder) dotGraph.render("output/ast.gv", view=False) # Code generator codeGenerator = None if 2 <= len(argv) - 1: codeGenerator = CodeGenerator(optimiser.symbolTable, argv[2]) else: codeGenerator = CodeGenerator(optimiser.symbolTable) AST.accept(codeGenerator)
def semanticAnalyse(self, file): lexer = CLexer(FileStream(os.path.dirname(os.path.abspath(__file__)) + "/" + file)) stream = CommonTokenStream(lexer) parser = CParser(stream) tree = parser.prog() astBuilder = ASTBuilder() AST = astBuilder.visit(tree) semanticValidator = SemanticValidator() AST.accept(semanticValidator) return semanticValidator.errors
def get_recommendations(query, fold_no): df_graph=None source=query.split('\n') i=len(source) while not df_graph: src_lines=source[:i] if src_lines: df_graph=ASTBuilder('\n'.join(source[:i])).build_AST() i-=1 query_obj_types=[] calls=[] query_line=re.split(r'[^\w]',source[-1]) query_obj=filter(None, query_line)[-1] assign_nodes, call_nodes= df_graph.find_assignments_and_calls(query_obj) for node in assign_nodes: query_obj_types.extend(node.src) for node in call_nodes: calls.append(node.tgt) query_count=Counter(calls) conn=sqlite3.connect("pyty.db") c=conn.cursor() objects=[] for type in query_obj_types: results=c.execute('''SELECT obj_calls FROM TRAINSET_{fold} WHERE obj_type=?'''. format(fold=fold_no),(type,)) for obj in results: obj_calls=obj[0].split(',') score=compute_manhattan_dist(query_count, Counter(obj_calls)) objects.append((obj_calls, score)) objects=sorted(objects, key=lambda tup: tup[0]) call_set=Counter() for object in objects: call_set.update(Counter(object[0])-query_count) recommendations=[call[0] for call in call_set.most_common(10)] return recommendations
def get_recommendations(query, fold_no): df_graph = None source = query.split('\n') i = len(source) while not df_graph: src_lines = source[:i] if src_lines: df_graph = ASTBuilder('\n'.join(source[:i])).build_AST() i -= 1 query_obj_types = [] calls = [] query_line = re.split(r'[^\w]', source[-1]) query_obj = filter(None, query_line)[-1] assign_nodes, call_nodes = df_graph.find_assignments_and_calls(query_obj) for node in assign_nodes: query_obj_types.extend(node.src) for node in call_nodes: calls.append(node.tgt) query_count = Counter(calls) conn = sqlite3.connect("pyty.db") c = conn.cursor() objects = [] for type in query_obj_types: results = c.execute( '''SELECT obj_calls FROM TRAINSET_{fold} WHERE obj_type=?'''. format(fold=fold_no), (type, )) for obj in results: obj_calls = obj[0].split(',') score = compute_manhattan_dist(query_count, Counter(obj_calls)) objects.append((obj_calls, score)) objects = sorted(objects, key=lambda tup: tup[0]) call_set = Counter() for object in objects: call_set.update(Counter(object[0]) - query_count) recommendations = [call[0] for call in call_set.most_common(10)] return recommendations
def semanticAnalyse(self, file): lexer = CLexer( FileStream( os.path.dirname(os.path.abspath(__file__)) + "/" + file)) stream = CommonTokenStream(lexer) parser = CParser(stream) tree = parser.prog() astBuilder = ASTBuilder() AST = astBuilder.visit(tree) oldAST = deepcopy(AST) semanticValidator = SemanticValidator() AST.accept(semanticValidator) optimiser = Optimiser(semanticValidator.symbolTable) AST.accept(optimiser) return optimiser.warnings, oldAST, AST
def main(argv): parser = argparse.ArgumentParser(description='A C to Pcode compiler') parser.add_argument('file', help='The c file to be compiled') parser.add_argument('-o', '--output', help='Directory to write compiled C file') parser.add_argument('-saveast', '--saveast', help='Write the AST to a file', action='store_true') parser.add_argument('-showast', '--showast', help='Print AST', action='store_true') parser.add_argument('-n', '--nocompile', help='Disable the compilation phase', action='store_true') args = vars(parser.parse_args()) filepath = os.path.split(args["file"]) filename = os.path.splitext(filepath[1])[0] outputpath = "" if (args["output"] != None): outputpath += args["output"] + "/" symboltable = SymbolTable() astBuilder = ASTBuilder(args["file"], symboltable) ast = astBuilder.build() print(symboltable) if (bool(args["nocompile"]) == False): compiled = ast.compile() # Write to file file = open(outputpath + filename + ".p", "w") file.write(compiled) file.close() # Should we serialize if (args["showast"] == True): astBuilder.serialize() if (args["saveast"] == True): file = open(outputpath + filename + ".ast", "w") file.write(astBuilder.serialize()) file.close()
from os import listdir from os.path import join from ASTBuilder import ASTBuilder import sys from ASTUtils import DEBUG import pprint import multiprocessing src_path = "srcfiles/" #flist=[f for f in listdir(src_path) if f.endswith(".py")] flist = ['test.py'] for f in flist: try: fname = join(src_path, f) print("FILENAME:" + f) df_graph=ASTBuilder(open(fname).read()).\ build_AST().serialize() if DEBUG: print "in ASTDemo" print pprint.pprint(df_graph) except SyntaxError as e: print "Syntax error in {0}".format(fname) pass except: print "Unexpected error:", sys.exc_info()[0] pass
def get_recos(query, fold_no, context_features, fname): recommendations=[] df_graph=None source=[l for l in query.split('\n') if l!=''] #source=source[:-1]+process(source[-1]) """Extract the Query Object""" last_line=process(source[-1]) query_line=re.split('=|\(|\)|\:|\,|\\s*',last_line[-1][:-1]) query_obj=re.findall(r'([self|\w]+.*)',query_line[-1])[-1] query_obj=query_obj.replace('\"','\'') #print fname, "query_obj", query_obj """Get the data flow graph using the least compilable code in the query""" source=source[:-1]+[source[-1]+"query_method"] l=len(source) i=l try_stack=[] parenthesis_stack=[] is_last_loop=True count=0 while not df_graph: for c in source[i-1][::-1]: if c in [')','}',']']: parenthesis_stack.append(c) elif c=='(': if not parenthesis_stack or parenthesis_stack[-1]!=')': source[l-1]=source[l-1]+')' #parenthesis_stack.append('(') #i=l else: parenthesis_stack.pop() elif c=='{': if not parenthesis_stack or parenthesis_stack[-1]!='}': source[l-1]=source[l-1]+'}' #parenthesis_stack.append('{') #i=l else: parenthesis_stack.pop() elif c=='[': if not parenthesis_stack or parenthesis_stack[-1]!=']': source[l-1]=source[l-1]+']' #parenthesis_stack.append('{') #i=l else: parenthesis_stack.pop() split_str=source[i-1].split() if split_str and is_last_loop: if split_str[-1][-1]==':': is_last_loop=False if 'try:' in source[i-1].strip() \ and i!=l: pos=source[i-1].find('try') indent_prefix=source[i-1][:pos] if indent_prefix not in try_stack: source=source[:l] source.append(indent_prefix+'except:') source.append(indent_prefix+'\t'+ 'pass') try_stack.append(indent_prefix) l=l+3 else: try_stack.remove(indent_prefix) if 'except ' in source[i-1] or 'except:' in source[i-1]\ and i!=l: pos=source[i-1].find('except') indent_prefix=source[i-1][:pos] if indent_prefix not in try_stack: try_stack.append(source[i-1][:pos]) if is_last_loop and len(split_str)>1: if 'if' in split_str[1:] and i==l: source[i-1]+=" else ''" for word in keywords: if word == source[i-1].split()[0]: pos=source[i-1].find(word) indent_prefix=source[i-1][:pos] if source[l-1][-1]!=':': source[l-1]=source[l-1]+':' source.append(indent_prefix+'\t'+'pass') if word=='except': try_stack.append(indent_prefix) l=l+1 is_last_loop=False break df_graph=ASTBuilder('\n'.join(source[:l])).build_AST() #df_graph=ASTBuilder('\n'.join(source[:i]+source[l:except_count])).build_AST() #print '\n'.join(source[:l][-40:]) #print '\n'.join(source[:i]+source[l:except_count]) # print source[i-1], try_stack # print '\n'.join(source[:i]+source[l:except_count][-20:]) #print '-'*40 i=i-1 count+=1 if i==0: break if count>500: print fname, "INFINITE LOOP" break print df_graph """Get Nearest Neighbours using Manhattan distance""" if df_graph: query_obj_types=[] query_obj_context=[] calls=[] other_calls=[] sql_query=[] assign_nodes=[] assign_nodes, call_nodes=df_graph.find_definitions_and_calls(query_obj) print call_nodes if assign_nodes: for node in assign_nodes: print node query_obj_types.extend(node.src) if node.context: print node.context, context_features for feature in context_features: if feature=='arg_type': sql_query.append('arg_types') query_obj_context.extend( extract_types(node.context)) elif feature=='arg_value': sql_query.append('arg_values') query_obj_context.extend( process_tokens(node.context)) elif feature=='object_name': sql_query.append('obj_name') query_obj_context.extend( process_obj_name(node.tgt) ) for call_type in call_nodes: if call_type=='object': calls.extend(call_nodes[call_type]) else: other_calls.extend(call_nodes[call_type]) sql_query.append('other_calls') sql_query.append('calls') query_count=Counter(calls+query_obj_context) conn=sqlite3.connect("pyty.db") c=conn.cursor() objects=[] for type in query_obj_types: sql_select='''SELECT {attr} FROM TRAINSET_{fold} WHERE obj_type=?'''.format( attr=','.join(sql_query),fold=fold_no) results=c.execute(sql_select,(type,)) if results: for obj in results: obj_count=Counter() for i in range(len(obj)): if obj[i]: obj_count+=Counter(obj[i].split(',')) obj_calls=obj[-1].split(',') if obj[-1] else '' score=compute_euclidean_dist(query_count, obj_count) objects.append((obj_calls, score)) objects=sorted(objects, key=lambda tup: tup[1]) call_set=Counter() min_score ='' for object in objects: if min_score=='': min_score=object[1] if object[1]==min_score: call_set.update(Counter(object[0])-query_count) elif len(call_set)<MAX_RECOS: min_score=object[1] else: break total=float(sum(call_set.values())) recommendations.extend([call[0] for call in call_set.most_common(MAX_RECOS)]) return recommendations