Example #1
0
def worker(folder, count):
    zf=ZipFile(folder,'r')
    df_graphs=[]
    for f in zf.namelist():
        if f.endswith('.py'):
            file=zf.read(f)
            print count, "Foldername:"+folder, \
                "Filename:"+f
            try:
                df_graph = ASTBuilder(file).build_AST()
                if df_graph is not None:
                    df_json=df_graph.serialize()
                    if int(df_json['count'])>1:
                        prog_info={
                            'folder':folder,
                            'file':f,
                            'graph':df_json}
                        df_graphs.append(prog_info)
            except:
                print "Error while parsing file:",f
                pass

    if df_graphs:
        with open('graphs-zip/graph'+str(count)+'.txt','w') as f:
            for graph in df_graphs:
                f.write(json.dumps(graph))
                f.write('\n'+'-'*60+'\n')
Example #2
0
def worker(folder):
    filename = 'repoData/' + folder + '/allPythonContent.py'
    fullfile = open(filename).read()
    file_splits = fullfile.split('########NEW FILE########')
    df_graphs = {'folder': folder, 'files': []}
    for piece in file_splits:
        piece = piece.strip()
        piece_name = piece.split('\n')[0].strip()
        if len(piece_name.split()) == 3:
            file_name = piece_name.split()[2]
            try:
                print "Foldername:" + folder, "Filename:" + file_name
                df_graph = ASTBuilder(piece).build_AST()
                if df_graph is not None:
                    df_json = df_graph.serialize()
                    if int(df_json['count']) > 1:
                        prog_info = {'file': file_name, 'graph': df_json}
                        df_graphs['files'].append(prog_info)
            except:
                print "Unexpected error in worker:", sys.exc_info()[0]
                f_test = open('srcfiles/test.py', 'w')
                f_test.write(piece)
                f_test.close()

    proc_name = str(os.getpid())
    if df_graphs['files']:
        f = open('graphs/graph-' + proc_name + '.txt', 'a')
        f.write(json.dumps(df_graphs))
        f.write('\n' + '-' * 20 + '\n')
        f.close()
Example #3
0
    def compile(self, java_syntax_tree):
        print("JAVA -> C++ converter v0.1\n")
        print("Generating AST ..")

        builder = ASTBuilder()
        builder.build_ast(java_syntax_tree)
        
        builder.start_traverse()
Example #4
0
    def compile(self, java_syntax_tree):
        print("JAVA -> C++ converter v0.1\n")
        print("Generating AST ..")

        builder = ASTBuilder()
        builder.build_ast(java_syntax_tree)
        
        out = builder.start_traverse()

        with open("generated.cxx", "w") as handle:
            handle.write(out)
Example #5
0
def main(argv):

    inputFile = FileStream(argv[1])
    lexer = CLexer(inputFile)
    stream = CommonTokenStream(lexer)
    parser = CParser(stream)
    tree = parser.prog()

    if parser.getNumberOfSyntaxErrors():
        return

    # Visualise parse tree
    parseTreeDotGen = ParseTreeDotGenerator()
    parseTreeDotGen.generateDOT(parser,
                                tree,
                                "output/parse_tree.gv",
                                render=False)

    # Build AST
    astBuilder = ASTBuilder()
    AST = astBuilder.visit(tree)

    # Semantic Validation
    semanticValidator = SemanticValidator()
    AST.accept(semanticValidator)

    # Print errors, if any
    if semanticValidator.errors:
        for error in semanticValidator.errors:
            print("ERROR: " + error)
        return

    # Code optimiser
    optimiser = Optimiser(semanticValidator.symbolTable)
    AST.accept(optimiser)

    # Print warnings, if any
    if optimiser.warnings:
        for warning in optimiser.warnings:
            print("WARNING: " + warning)

    # Visualise AST
    dotGraph = AST.visit(DotGraphBuilder)
    dotGraph.render("output/ast.gv", view=False)

    # Code generator
    codeGenerator = None
    if 2 <= len(argv) - 1:
        codeGenerator = CodeGenerator(optimiser.symbolTable, argv[2])
    else:
        codeGenerator = CodeGenerator(optimiser.symbolTable)
    AST.accept(codeGenerator)
    def semanticAnalyse(self, file):
        lexer = CLexer(FileStream(os.path.dirname(os.path.abspath(__file__)) + "/" + file))
        stream = CommonTokenStream(lexer)
        parser = CParser(stream)
        tree = parser.prog()

        astBuilder = ASTBuilder()
        AST = astBuilder.visit(tree)

        semanticValidator = SemanticValidator()
        AST.accept(semanticValidator)

        return semanticValidator.errors
Example #7
0
def get_recommendations(query, fold_no):
    df_graph=None
    source=query.split('\n')
    i=len(source)
    while not df_graph:
        src_lines=source[:i]
        if src_lines:
            df_graph=ASTBuilder('\n'.join(source[:i])).build_AST()
            i-=1

    query_obj_types=[]
    calls=[]
    query_line=re.split(r'[^\w]',source[-1])
    query_obj=filter(None, query_line)[-1]
    assign_nodes, call_nodes= df_graph.find_assignments_and_calls(query_obj)
    for node in assign_nodes:
        query_obj_types.extend(node.src)

    for node in call_nodes:
        calls.append(node.tgt)

    query_count=Counter(calls)

    conn=sqlite3.connect("pyty.db")
    c=conn.cursor()
    objects=[]


    for type in query_obj_types:

        results=c.execute('''SELECT obj_calls FROM TRAINSET_{fold} WHERE obj_type=?'''.
                          format(fold=fold_no),(type,))
        for obj in results:
            obj_calls=obj[0].split(',')
            score=compute_manhattan_dist(query_count,
                                   Counter(obj_calls))
            objects.append((obj_calls, score))

    objects=sorted(objects, key=lambda tup: tup[0])

    call_set=Counter()
    for object in objects:
        call_set.update(Counter(object[0])-query_count)

    recommendations=[call[0] for call in call_set.most_common(10)]
    return recommendations
Example #8
0
def get_recommendations(query, fold_no):
    df_graph = None
    source = query.split('\n')
    i = len(source)
    while not df_graph:
        src_lines = source[:i]
        if src_lines:
            df_graph = ASTBuilder('\n'.join(source[:i])).build_AST()
            i -= 1

    query_obj_types = []
    calls = []
    query_line = re.split(r'[^\w]', source[-1])
    query_obj = filter(None, query_line)[-1]
    assign_nodes, call_nodes = df_graph.find_assignments_and_calls(query_obj)
    for node in assign_nodes:
        query_obj_types.extend(node.src)

    for node in call_nodes:
        calls.append(node.tgt)

    query_count = Counter(calls)

    conn = sqlite3.connect("pyty.db")
    c = conn.cursor()
    objects = []

    for type in query_obj_types:

        results = c.execute(
            '''SELECT obj_calls FROM TRAINSET_{fold} WHERE obj_type=?'''.
            format(fold=fold_no), (type, ))
        for obj in results:
            obj_calls = obj[0].split(',')
            score = compute_manhattan_dist(query_count, Counter(obj_calls))
            objects.append((obj_calls, score))

    objects = sorted(objects, key=lambda tup: tup[0])

    call_set = Counter()
    for object in objects:
        call_set.update(Counter(object[0]) - query_count)

    recommendations = [call[0] for call in call_set.most_common(10)]
    return recommendations
Example #9
0
    def semanticAnalyse(self, file):
        lexer = CLexer(
            FileStream(
                os.path.dirname(os.path.abspath(__file__)) + "/" + file))
        stream = CommonTokenStream(lexer)
        parser = CParser(stream)
        tree = parser.prog()

        astBuilder = ASTBuilder()
        AST = astBuilder.visit(tree)
        oldAST = deepcopy(AST)

        semanticValidator = SemanticValidator()
        AST.accept(semanticValidator)

        optimiser = Optimiser(semanticValidator.symbolTable)
        AST.accept(optimiser)

        return optimiser.warnings, oldAST, AST
Example #10
0
def main(argv):
    parser = argparse.ArgumentParser(description='A C to Pcode compiler')
    parser.add_argument('file', help='The c file to be compiled')
    parser.add_argument('-o',
                        '--output',
                        help='Directory to write compiled C file')
    parser.add_argument('-saveast',
                        '--saveast',
                        help='Write the AST to a file',
                        action='store_true')
    parser.add_argument('-showast',
                        '--showast',
                        help='Print AST',
                        action='store_true')
    parser.add_argument('-n',
                        '--nocompile',
                        help='Disable the compilation phase',
                        action='store_true')
    args = vars(parser.parse_args())

    filepath = os.path.split(args["file"])
    filename = os.path.splitext(filepath[1])[0]
    outputpath = ""
    if (args["output"] != None):
        outputpath += args["output"] + "/"

    symboltable = SymbolTable()
    astBuilder = ASTBuilder(args["file"], symboltable)
    ast = astBuilder.build()

    print(symboltable)

    if (bool(args["nocompile"]) == False):
        compiled = ast.compile()

        # Write to file
        file = open(outputpath + filename + ".p", "w")
        file.write(compiled)
        file.close()

    # Should we serialize
    if (args["showast"] == True):
        astBuilder.serialize()

    if (args["saveast"] == True):
        file = open(outputpath + filename + ".ast", "w")
        file.write(astBuilder.serialize())
        file.close()
Example #11
0
from os import listdir
from os.path import join
from ASTBuilder import ASTBuilder
import sys
from ASTUtils import DEBUG
import pprint
import multiprocessing

src_path = "srcfiles/"
#flist=[f for f in listdir(src_path) if f.endswith(".py")]
flist = ['test.py']
for f in flist:
    try:
        fname = join(src_path, f)
        print("FILENAME:" + f)

        df_graph=ASTBuilder(open(fname).read()).\
            build_AST().serialize()

        if DEBUG:
            print "in ASTDemo"
            print pprint.pprint(df_graph)

    except SyntaxError as e:
        print "Syntax error in {0}".format(fname)
        pass
    except:
        print "Unexpected error:", sys.exc_info()[0]
        pass
Example #12
0
def get_recos(query, fold_no, context_features, fname):
    recommendations=[]
    df_graph=None
    source=[l for l in query.split('\n') if l!='']
    #source=source[:-1]+process(source[-1])
    """Extract the Query Object"""
    last_line=process(source[-1])
    query_line=re.split('=|\(|\)|\:|\,|\\s*',last_line[-1][:-1])

    query_obj=re.findall(r'([self|\w]+.*)',query_line[-1])[-1]
    query_obj=query_obj.replace('\"','\'')
    #print fname, "query_obj", query_obj
    """Get the data flow graph using the least compilable code in the query"""
    source=source[:-1]+[source[-1]+"query_method"]
    l=len(source)
    i=l
    try_stack=[]
    parenthesis_stack=[]
    is_last_loop=True
    count=0
    while not df_graph:
        for c in source[i-1][::-1]:
            if c in [')','}',']']:
                parenthesis_stack.append(c)
            elif c=='(':
                if not parenthesis_stack or parenthesis_stack[-1]!=')':
                    source[l-1]=source[l-1]+')'
                    #parenthesis_stack.append('(')
                    #i=l
                else:
                    parenthesis_stack.pop()
            elif c=='{':
                if not parenthesis_stack or parenthesis_stack[-1]!='}':
                    source[l-1]=source[l-1]+'}'
                    #parenthesis_stack.append('{')
                    #i=l
                else:
                    parenthesis_stack.pop()
            elif c=='[':
                if not parenthesis_stack or parenthesis_stack[-1]!=']':
                    source[l-1]=source[l-1]+']'
                    #parenthesis_stack.append('{')
                    #i=l
                else:
                    parenthesis_stack.pop()
        split_str=source[i-1].split()

        if split_str and is_last_loop:
            if split_str[-1][-1]==':':
                is_last_loop=False
        if 'try:' in source[i-1].strip() \
                and i!=l:
            pos=source[i-1].find('try')
            indent_prefix=source[i-1][:pos]
            if indent_prefix not in try_stack:
                source=source[:l]
                source.append(indent_prefix+'except:')
                source.append(indent_prefix+'\t'+ 'pass')
                try_stack.append(indent_prefix)
                l=l+3
                
            else:
                try_stack.remove(indent_prefix)
                

        if 'except ' in source[i-1] or 'except:' in source[i-1]\
                and i!=l:
            pos=source[i-1].find('except')
            indent_prefix=source[i-1][:pos]
            if indent_prefix not in try_stack:
                try_stack.append(source[i-1][:pos])

        if is_last_loop and len(split_str)>1:
            if 'if' in split_str[1:] and i==l:
                source[i-1]+=" else ''"

            for word in keywords:
                if word == source[i-1].split()[0]:
                    pos=source[i-1].find(word)
                    indent_prefix=source[i-1][:pos]
                    if source[l-1][-1]!=':':
                        source[l-1]=source[l-1]+':'
                    source.append(indent_prefix+'\t'+'pass')
                    if word=='except':
                        try_stack.append(indent_prefix)
                    l=l+1
                    is_last_loop=False
                    break



        df_graph=ASTBuilder('\n'.join(source[:l])).build_AST()
        #df_graph=ASTBuilder('\n'.join(source[:i]+source[l:except_count])).build_AST()
        #print '\n'.join(source[:l][-40:])
        #print '\n'.join(source[:i]+source[l:except_count])

        # print source[i-1], try_stack
        # print '\n'.join(source[:i]+source[l:except_count][-20:])
        #print '-'*40

        i=i-1
        count+=1

        if i==0:
            break

        if count>500:
            print  fname, "INFINITE LOOP"
            break

    print df_graph

    """Get Nearest Neighbours using Manhattan distance"""
    if df_graph:
        query_obj_types=[]
        query_obj_context=[]
        calls=[]
        other_calls=[]
        sql_query=[]
        assign_nodes=[]
        assign_nodes, call_nodes=df_graph.find_definitions_and_calls(query_obj)
        print call_nodes
        if assign_nodes:
            for node in assign_nodes:
                print node
                query_obj_types.extend(node.src)
                if node.context:
                    print node.context, context_features
                    for feature in context_features:
                        if feature=='arg_type':
                            sql_query.append('arg_types')
                            query_obj_context.extend(
                                extract_types(node.context))
                        elif feature=='arg_value':
                            sql_query.append('arg_values')
                            query_obj_context.extend(
                                process_tokens(node.context))
                        elif feature=='object_name':
                            sql_query.append('obj_name')
                            query_obj_context.extend(
                                process_obj_name(node.tgt)
                            )

            for call_type in call_nodes:
                if call_type=='object':
                    calls.extend(call_nodes[call_type])
                else:
                    other_calls.extend(call_nodes[call_type])

            sql_query.append('other_calls')
            sql_query.append('calls')

            query_count=Counter(calls+query_obj_context)

            conn=sqlite3.connect("pyty.db")
            c=conn.cursor()
            objects=[]
            for type in query_obj_types:
                sql_select='''SELECT {attr} FROM TRAINSET_{fold} WHERE obj_type=?'''.format(
                    attr=','.join(sql_query),fold=fold_no)
                results=c.execute(sql_select,(type,))
                if results:
                    for obj in results:
                        obj_count=Counter()
                        for i in range(len(obj)):
                            if obj[i]:
                                obj_count+=Counter(obj[i].split(','))
                        obj_calls=obj[-1].split(',') if obj[-1] else ''
                        score=compute_euclidean_dist(query_count, obj_count)
                        objects.append((obj_calls, score))

            objects=sorted(objects, key=lambda tup: tup[1])
            call_set=Counter()
            min_score =''
            for object in objects:
                if min_score=='':
                    min_score=object[1]
                if object[1]==min_score:
                    call_set.update(Counter(object[0])-query_count)
                elif len(call_set)<MAX_RECOS:
                    min_score=object[1]
                else:
                    break
            total=float(sum(call_set.values()))
            recommendations.extend([call[0] for call in call_set.most_common(MAX_RECOS)])
        return recommendations