Example #1
0
def generate_cli(targetPath, isAbstraction):
    import subprocess
    directory = targetPath.rstrip('/').rstrip("\\")

    if isAbstraction.lower() == "on":
        absLevel = 4
    else:
        absLevel = 0

    proj = directory.replace('\\', '/').split('/')[-1]
    print("PROJ:", proj)
    timeIn = time.time()
    numFile = 0
    numFunc = 0
    numLine = 0
    numMethods = 0
    numFields = 0

    projDic = {}
    hashFileMap = {}

    print("[+] Loading source files... This may take a few minutes.")
    tupleList = pu.loadSource(directory)

    numFile = len(tupleList)
    if numFile == 0:
        print("[-] Error: Failed loading source files.")
        print(
            "    Check if you selected proper directory, or if your project contains .c, .cpp or java files."
        )
        sys.exit()
    else:
        print("[+] Load complete. Generating hashmark...")

        if absLevel == 0:
            func = parseFiles_shallow
        else:
            func = parseFiles_deep

        listOfHashJsons = []
        parseResult = {}

        for idx, tup in enumerate(tupleList):
            parseResult = func(tup)
            f = parseResult[0]
            functionInstanceList = parseResult[1]
            language = parseResult[2]

            pathOnly = f.split(proj, 1)[1][1:]

            fullName = proj + f.split(proj, 1)[1]
            pathOnly = f.split(proj, 1)[1][1:]

            if osName == "win":
                columns = 80
            else:
                try:
                    # http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python
                    rows, columns = subprocess.check_output(['stty',
                                                             'size']).split()
                except:
                    columns = 80

            progress = 100 * float(idx + 1) / numFile
            buf = "\r%.2f%% %s" % (progress, fullName)
            buf += " " * (int(columns) - len(buf))
            sys.stdout.write(buf)
            sys.stdout.flush()

            numFunc += len(functionInstanceList)

            if len(functionInstanceList) > 0:
                numLine += functionInstanceList[0].parentNumLoc
            for f in functionInstanceList:
                f.removeListDup()
                path = f.parentFile
                origBody, absBody = pu.new_abstract(f, absLevel, language)
                absBody = pu.normalize(absBody)
                funcLen = len(absBody)
                Json = {}
                if funcLen > 50:
                    hashValue = md5(absBody.encode('utf-8')).hexdigest()
                    cutLength = len(
                        str(f.parentFile.split(str(proj) + "/")[0]) +
                        str(proj) + "/")
                    Json["file"] = str(f.parentFile[cutLength:])
                    Json["function id"] = str(f.funcId)
                    Json["function length"] = str(funcLen)
                    Json["hash value"] = str(hashValue)
                    listOfHashJsons.append(Json)
                else:
                    numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold

        print("")
        print("[+] Hash index successfully generated.")
        print("[+] Saving hash index to file...", end=' ')

        try:
            os.mkdir("hidx")
        except:
            pass
        packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(
            numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n'
        with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx",
                  'w',
                  encoding="utf-8") as fp:
            fp.write(packageInfo)
            fp.write(str(listOfHashJsons))

        timeOut = time.time()

        print("(Done)")
        print("")
        print("[+] Elapsed time: %.02f sec." % (timeOut - timeIn))
        print("Program statistics:")
        print(" - " + str(numFile) + ' files;')
        print(" - " + str(numFunc) + ' functions;')
        print(" - " + str(numLine) + ' lines of code.')
        print("")
        print("[+] Hash index saved to: " + os.getcwd().replace("\\", "/") +
              "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx")
Example #2
0
    def generate(self):
        directory = self.directory.get()
        absLevel = int(self.absLevel.get())
        self.progress = 0

        proj = directory.replace('\\', '/').split('/')[-1]
        timeIn = time.time()
        numFile = 0
        numFunc = 0
        numMethods = 0
        numLine = 0
        numFields = 0

        projDic = {}
        hashFileMap = {}

        self.listProcess.config(state="normal")
        self.listProcess.insert(
            Tkinter.END,
            "Loading source files... This may take a few minutes.")
        self.listProcess.update()

        tupleList = pu.loadSource(directory)

        numFile = len(tupleList)
        if numFile == 0:
            self.listProcess.insert(Tkinter.END,
                                    "Error: Failed loading source files.")
            self.listProcess.insert(
                Tkinter.END,
                "- Check if you selected proper directory, or if your project contains .c, .cpp, .py, .js, .go or .java files."
            )
        else:
            self.listProcess.insert(Tkinter.END,
                                    "Load complete. Generating hashmark...")

            if absLevel == 0:
                func = parseFiles_shallow
            else:
                func = parseFiles_deep

            listOfHashJsons = []
            parseResult = {}

            for idx, tup in enumerate(tupleList):
                parseResult = func(tup)
                f = parseResult[0]
                functionInstanceList = parseResult[1]
                language = parseResult[2]
                pathOnly = f.split(proj, 1)[1][1:]
                progress = float(idx + 1) / numFile

                self.progressbar["value"] = progress
                self.progressbar.update()
                self.listProcess.insert(Tkinter.END, "[+] " + f)
                self.listProcess.see("end")

                numFunc += len(functionInstanceList)

                if len(functionInstanceList) > 0:
                    numLine += functionInstanceList[0].parentNumLoc
                for f in functionInstanceList:
                    f.removeListDup()
                    path = f.parentFile
                    origBody, absBody = pu.new_abstract(f, absLevel, language)
                    absBody = pu.normalize(absBody)
                    funcLen = len(absBody)

                    Json = {}
                    if funcLen > 50:
                        hashValue = md5(absBody.encode('utf-8')).hexdigest()
                        cutLength = len(
                            str(f.parentFile.split(str(proj) + "/")[0]) +
                            str(proj) + "/")
                        Json["file"] = str(f.parentFile[cutLength:])
                        Json["function id"] = str(f.funcId)
                        Json["function length"] = str(funcLen)
                        Json["hash value"] = str(hashValue)
                        listOfHashJsons.append(Json)
                    else:
                        numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold

            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(Tkinter.END,
                                    "Hash index successfully generated.")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END,
                                    "Saving hash index to file...")
            self.listProcess.see("end")

            try:
                os.mkdir("hidx")
            except:
                pass
            packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(
                numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n'
            with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx",
                      'w',
                      encoding="utf-8") as fp:
                fp.write(packageInfo)
                fp.write(str(listOfHashJsons))

            timeOut = time.time()

            self.listProcess.insert(Tkinter.END, "Done.")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(
                Tkinter.END, "Elapsed time: %.02f sec." % (timeOut - timeIn))
            self.listProcess.see("end")

            self.listProcess.insert(Tkinter.END, "Program statistics:")
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numFile) + ' files;')
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numFunc) + ' functions;')
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numLine) + ' lines of code.')
            self.listProcess.see("end")

            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(
                Tkinter.END,
                "Hash index saved to: " + os.getcwd().replace("\\", "/") +
                "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx")
            self.listProcess.see("end")
            self.btnOpenFolder.config(state="normal")

        return 0