Ejemplo n.º 1
0
def produce_funcBody_hash(function):
    '''
    return the hash value of abstracted and normalized function Body.
    '''
    absBody = pu.abstract(function, 4)[1]
    absBody = pu.normalize(absBody)
    hash_value = fnv1a_hash(absBody)

    #print "hash_value:", hash_value
    #print "absBody", absBody
    return [hash_value, absBody]
Ejemplo n.º 2
0
def generate_cli(targetPath, isAbstraction):
    import subprocess
    directory = targetPath.rstrip('/').rstrip("\\")

    if isAbstraction.lower() == "on":
        absLevel = 4
    else:
        absLevel = 0

    proj = directory.replace('\\', '/').split('/')[-1]
    print "PROJ:", proj
    timeIn = time.time()
    numFile = 0
    numFunc = 0
    numLine = 0

    projDic = {}
    hashFileMap = {}

    print "[+] Loading source files... This may take a few minutes."

    fileList = pu.loadSource(directory)
    numFile = len(fileList)

    if numFile == 0:
        print "[-] Error: Failed loading source files."
        print "    Check if you selected proper directory, or if your project contains .c or .cpp files."
        sys.exit()
    else:
        print "[+] Load complete. Generating hashmark..."

        if absLevel == 0:
            func = parseFile_shallow_multi
        else:
            func = parseFile_deep_multi

        cpu_count = get_cpu_count.get_cpu_count()
        if cpu_count != 1:
            cpu_count -= 1

        pool = multiprocessing.Pool(processes=cpu_count)
        for idx, tup in enumerate(pool.imap_unordered(func, fileList)):
            f = tup[0]
            functionInstanceList = tup[1]

            fullName = proj + f.split(proj, 1)[1]
            pathOnly = f.split(proj, 1)[1][1:]

            if osName == "win":
                columns = 80
            else:
                try:
                    # http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python
                    rows, columns = subprocess.check_output(['stty',
                                                             'size']).split()
                except:
                    columns = 80

            progress = 100 * float(idx + 1) / numFile
            buf = "\r%.2f%% %s" % (progress, fullName)
            buf += " " * (int(columns) - len(buf))
            sys.stdout.write(buf)
            sys.stdout.flush()

            numFunc += len(functionInstanceList)

            if len(functionInstanceList) > 0:
                numLine += functionInstanceList[0].parentNumLoc

            for f in functionInstanceList:
                f.removeListDup()
                path = f.parentFile
                # print "\nORIGINALLY:", f.funcBody
                absBody = pu.abstract(f, absLevel)[1]
                absBody = pu.normalize(absBody)
                funcLen = len(absBody)
                # print "\n", funcLen, absBody

                if funcLen > 50:
                    hashValue = md5(absBody).hexdigest()

                    try:
                        projDic[funcLen].append(hashValue)
                    except KeyError:
                        projDic[funcLen] = [hashValue]
                    try:
                        hashFileMap[hashValue].extend([pathOnly, f.funcId])
                    except KeyError:
                        hashFileMap[hashValue] = [pathOnly, f.funcId]
                else:
                    numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold

        print ""
        print "[+] Hash index successfully generated."
        print "[+] Saving hash index to file...",

        packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(
            numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n'
        with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx",
                  'w') as fp:
            fp.write(packageInfo)

            for key in sorted(projDic):
                fp.write(str(key) + '\t')
                for h in list(set(projDic[key])):
                    fp.write(h + '\t')
                fp.write('\n')

            fp.write('\n=====\n')

            for key in sorted(hashFileMap):
                fp.write(str(key) + '\t')
                for f in hashFileMap[key]:
                    fp.write(str(f) + '\t')
                fp.write('\n')

        timeOut = time.time()

        print "(Done)"
        print ""
        print "[+] Elapsed time: %.02f sec." % (timeOut - timeIn)
        print "Program statistics:"
        print " - " + str(numFile) + ' files;'
        print " - " + str(numFunc) + ' functions;'
        print " - " + str(numLine) + ' lines of code.'
        print ""
        print "[+] Hash index saved to: " + os.getcwd().replace(
            "\\",
            "/") + "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx"
Ejemplo n.º 3
0
    def generate(self):
        directory = self.directory.get()
        absLevel = int(self.absLevel.get())
        self.progress = 0

        proj = directory.replace('\\', '/').split('/')[-1]
        timeIn = time.time()
        numFile = 0
        numFunc = 0
        numLine = 0

        projDic = {}
        hashFileMap = {}

        self.listProcess.config(state="normal")
        self.listProcess.insert(
            Tkinter.END,
            "Loading source files... This may take a few minutes.")
        self.listProcess.update()

        fileList = pu.loadSource(directory)
        numFile = len(fileList)

        if numFile == 0:
            self.listProcess.insert(Tkinter.END,
                                    "Error: Failed loading source files.")
            self.listProcess.insert(
                Tkinter.END,
                "- Check if you selected proper directory, or if your project contains .c or .cpp files."
            )
        else:
            # self.listProcess.insert(END, "")
            self.listProcess.insert(Tkinter.END,
                                    "Load complete. Generating hashmark...")
            # self.listProcess.insert(END, "")
            # self.listProcess.insert(END, "")

            if absLevel == 0:
                func = parseFile_shallow_multi
            else:
                func = parseFile_deep_multi

            cpu_count = get_cpu_count.get_cpu_count()
            if cpu_count != 1:
                cpu_count -= 1

            pool = multiprocessing.Pool(processes=cpu_count)
            for idx, tup in enumerate(pool.imap_unordered(func, fileList)):
                f = tup[0]

                functionInstanceList = tup[1]
                pathOnly = f.split(proj, 1)[1][1:]
                progress = float(idx + 1) / numFile

                self.progressbar["value"] = progress
                self.progressbar.update()
                self.listProcess.insert(Tkinter.END, "[+] " + f)
                self.listProcess.see("end")

                numFunc += len(functionInstanceList)

                if len(functionInstanceList) > 0:
                    numLine += functionInstanceList[0].parentNumLoc

                for f in functionInstanceList:
                    f.removeListDup()
                    path = f.parentFile
                    absBody = pu.abstract(f, absLevel)[1]
                    # self.listProcess.insert(Tkinter.END, absBody)
                    absBody = pu.normalize(absBody)
                    funcLen = len(absBody)

                    if funcLen > 50:
                        hashValue = md5(absBody).hexdigest()

                        try:
                            projDic[funcLen].append(hashValue)
                        except KeyError:
                            projDic[funcLen] = [hashValue]
                        try:
                            hashFileMap[hashValue].extend([pathOnly, f.funcId])
                        except KeyError:
                            hashFileMap[hashValue] = [pathOnly, f.funcId]
                    else:
                        numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold

            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(Tkinter.END,
                                    "Hash index successfully generated.")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END,
                                    "Saving hash index to file...")
            self.listProcess.see("end")

            try:
                os.mkdir("hidx")
            except:
                pass
            packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(
                numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n'
            with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx",
                      'w') as fp:
                fp.write(packageInfo)

                for key in sorted(projDic):
                    fp.write(str(key) + '\t')
                    for h in list(set(projDic[key])):
                        fp.write(h + '\t')
                    fp.write('\n')

                fp.write('\n=====\n')

                for key in sorted(hashFileMap):
                    fp.write(str(key) + '\t')
                    for f in hashFileMap[key]:
                        fp.write(str(f) + '\t')
                    fp.write('\n')

            timeOut = time.time()

            self.listProcess.insert(Tkinter.END, "Done.")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(
                Tkinter.END, "Elapsed time: %.02f sec." % (timeOut - timeIn))
            self.listProcess.see("end")

            self.listProcess.insert(Tkinter.END, "Program statistics:")
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numFile) + ' files;')
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numFunc) + ' functions;')
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numLine) + ' lines of code.')
            self.listProcess.see("end")

            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(
                Tkinter.END,
                "Hash index saved to: " + os.getcwd().replace("\\", "/") +
                "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx")
            self.listProcess.see("end")
            self.btnOpenFolder.config(state="normal")

        return 0