コード例 #1
0
def produce_funcBody_hash(function):
    '''
    return the hash value of abstracted and normalized function Body.
    '''
    absBody = pu.abstract(function, 4)[1]
    absBody = pu.normalize(absBody)
    hash_value = fnv1a_hash(absBody)

    #print "hash_value:", hash_value
    #print "absBody", absBody
    return [hash_value, absBody]
コード例 #2
0
def produce_slice_hash(variable_list, slice_content):
    '''
    slice_content: String. original slice.
    FileName: String
    return the hash value of abstracted and normalized function slice.
    '''
    absSlice = pu.removeComment(slice_content)
    absSlice = pu.abstract_slice(slice_content, variable_list)
    absSlice = pu.normalize(absSlice)
    hash_value = fnv1a_hash(absSlice)

    return [hash_value, absSlice]
コード例 #3
0
def detect_source_code():
    bitvector_size = config.bloomfilter_size
    bitvector = bitarray.bitarray(bitvector_size)
    bitvector_dic = {}    # record the slice's hashvalue and the line numbers. eg: {1839273: [1,5,7,9,10], 34502394: [6,7,8,10,11,12]}
    
    vul_dic = {}
    with open(config.vul_repo_file_path, 'r') as f:
        vul_dic = json.load(f, encoding='gbk')
    print "[+]import vul completed."
    
    if os.path.exists(config.result_path):
        os.remove(config.result_path)
    outfile = open(config.result_path, 'a')
    outfile.write("""
<!DOCTYPE html>
<html>
<head>
    <title>Result - Report</title>
    <style type="text/css">
    .container { padding: 3px 3px 3px 3px; font-size: 14px; }
    .patch { background-color: #CCCCCC; border: 2px solid #555555; margin: 0px 0px 5px 0px }
    .source { background-color: #DDDDDD; padding: 3px 3px 3px 3px; margin: 0px 0px 5px 0px }
    .filepath { font-size: small; font-weight: bold; color: #0000AA; padding: 5px 5px 5px 5px; }
    .codechunk { font-family: monospace; font-size: small; white-space: pre-wrap; padding: 0px 0px 0px 50px; }
    .linenumber { font-family: monospace; font-size: small; float: left; color: #777777; }
    </style>
    <script language="javascript">
        function togglePrev(node) {
            var targetDiv = node.previousSibling;
            targetDiv.style.display = (targetDiv.style.display=='none')?'block':'none';
            node.innerHTML = (node.innerHTML=='+ show +')?'- hide -':'+ show +';
        }
        function toggleNext(node) {
            var targetDiv = node.nextSibling;
            targetDiv.style.display = (targetDiv.style.display=='none')?'block':'none';
            node.innerHTML = (node.innerHTML=='+ show +')?'- hide -':'+ show +';
        }
    </script>
</head>
<body>
<div style="width: 100%; margin: 0px auto">""")
    
    total = 0
    for root, dirs, files in os.walk(config.src_func_path):
        for func_file in files:
            if not func_file.endswith('.c'):
                continue
            total += 1
    
    index = 1
    report_num = 1
    for root, dirs, files in os.walk(config.src_func_path):
        for func_file in files:
            if not func_file.endswith('.c'):
                continue
            
            # first, get the abstracted/normalized func_Body, to detect if the hashvalue of the func_Body is vulnerability.
            print "-----------------------------------------------------------"
            print index, "/", total, os.path.join(root, func_file), "started."
            
            #if index < 2642:
                #index += 1
                #continue
            #if func_file != "ssl#~d1_lib.c$dtls1_free$132-181.c":
                #continue
            
            
            index += 1
            start_time = time.time()
            
            #get variable list.
            function = pu.parseFile_deep(os.path.join(root, func_file))
            if len(function) == 0:
                print "The file <", os.path.join(root, func_file), "> has ", len(function), " funcitons."
                continue
            if len(function) != 1:
                print "The file <", os.path.join(root, func_file), "> has ", len(function), " funcitons."
            
            # a threshold for function
            if len(pu.normalize(function[0].funcBody)) < 50:
                continue
            variable_list = function[0].variableList
            parse_time = time.time()
            print "parse function time:", str(parse_time - start_time), "s."
            
            temp = produce_slice.produce_funcBody_hash(function[0])

            if temp == "":
                continue
            hash_value = temp[0]
            for vulfunc_file_name, record in vul_dic.items():
                if record['hashvalue'][0] == hash_value:
                    print func_file, "  Bingo(1) !", "match vul_function:", vulfunc_file_name
                    report(outfile, os.path.join(root, func_file), 0, vulfunc_file_name, "", report_num, "Bingo(1)")
                    report_num += 1
                    break
            
            # if the func_Body is "not" vulnerablity, then produce slices for current function. 
            # Build a bitvector for current function,         
            else:  #if 'break' executed, the else will no be executed.
                type1_time = time.time()
                print "detect type1 time:", str(type1_time - parse_time), "s."
                
                slice_time1 = time.time()
                dpd_content = ""
                func_content = []
                if not os.path.exists(os.path.join(config.src_funcDpd_path, func_file)):
                    continue
                with open(os.path.join(config.src_funcDpd_path, func_file), 'r') as ff:
                    temp = ff.readlines()
                    dpd_content = "".join("".join(temp).split('\n'))
                dpd_dic = produce_slice.slice_from_project(dpd_content)
                
                with open(os.path.join(root, func_file), 'r') as ff:
                    func_content = ff.readlines()
                
                # build a bitvector according to dpd_dic
                bitvector.setall(0)
                flag1 = True
                for line_num, line_dpd in dpd_dic.items():
                    slice_content = produce_slice.get_slice_content(func_content, line_dpd)
                    if slice_content == []:
                        continue
                    if slice_content == "":
                        flag1 = False
                        break
                    temp1 = produce_slice.produce_slice_hash(variable_list, slice_content)
                    slice_hash = temp1[0]
                    bitvector[slice_hash] = 1
                    bitvector_dic[slice_hash] = line_dpd
                if not flag1:
                    print "[Error]The dpd-files wrong."
                    continue
                slice_time2 = time.time()
                
                print "produce slices time:", str(slice_time2 - slice_time1), "s."
                
                detect_time1 = time.time()
                for vul_filename, records in vul_dic.items():
                    if bitvector[records['hashvalue'][0]] == 1:
                        print func_file, "   Bingo(3) !", vul_filename, "------------"
                        line_list = bitvector_dic[records['hashvalue'][0]]
                        line_list = list(set(line_list))
                        line_list.sort()
                        report(outfile, os.path.join(root, func_file), line_list, vul_filename, "", report_num, "Bingo(3)")
                        report_num += 1
                        break
                    if len(records['hashvalue']) == 1:
                        continue
                    flag = True
                    matched_hash = []
                    for n in records['hashvalue'][1:]:
                        if bitvector[n] == 1:
                            matched_hash.append(n)
                        else:
                            flag = False
                            matched_hash = []
                            break
                    if flag:
                        print func_file, "   Bingo(2) !", vul_filename, records['lineNumber'], "------------"
                        line_list = []
                        for i in matched_hash:
                            line_list.extend(bitvector_dic[i])
                        line_list = list(set(line_list))
                        line_list.sort()
                        report(outfile, os.path.join(root, func_file), line_list, vul_filename, records['lineNumber'], report_num, "Bingo(2)")
                        report_num += 1
                        break
                detect_time2 = time.time()
                print "detect time: ", str(detect_time2 - detect_time1), "s."
                print "total time:", str(detect_time2 - start_time), "s."
                
    outfile.write("""
</div>
</body>
</html>""")
    outfile.close()
コード例 #4
0
def generate_cli(targetPath, isAbstraction):
    import subprocess
    directory = targetPath.rstrip('/').rstrip("\\")

    if isAbstraction.lower() == "on":
        absLevel = 4
    else:
        absLevel = 0

    proj = directory.replace('\\', '/').split('/')[-1]
    print("PROJ:", proj)
    timeIn = time.time()
    numFile = 0
    numFunc = 0
    numLine = 0
    numMethods = 0
    numFields = 0

    projDic = {}
    hashFileMap = {}

    print("[+] Loading source files... This may take a few minutes.")
    tupleList = pu.loadSource(directory)

    numFile = len(tupleList)
    if numFile == 0:
        print("[-] Error: Failed loading source files.")
        print(
            "    Check if you selected proper directory, or if your project contains .c, .cpp or java files."
        )
        sys.exit()
    else:
        print("[+] Load complete. Generating hashmark...")

        if absLevel == 0:
            func = parseFiles_shallow
        else:
            func = parseFiles_deep

        listOfHashJsons = []
        parseResult = {}

        for idx, tup in enumerate(tupleList):
            parseResult = func(tup)
            f = parseResult[0]
            functionInstanceList = parseResult[1]
            language = parseResult[2]

            pathOnly = f.split(proj, 1)[1][1:]

            fullName = proj + f.split(proj, 1)[1]
            pathOnly = f.split(proj, 1)[1][1:]

            if osName == "win":
                columns = 80
            else:
                try:
                    # http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python
                    rows, columns = subprocess.check_output(['stty',
                                                             'size']).split()
                except:
                    columns = 80

            progress = 100 * float(idx + 1) / numFile
            buf = "\r%.2f%% %s" % (progress, fullName)
            buf += " " * (int(columns) - len(buf))
            sys.stdout.write(buf)
            sys.stdout.flush()

            numFunc += len(functionInstanceList)

            if len(functionInstanceList) > 0:
                numLine += functionInstanceList[0].parentNumLoc
            for f in functionInstanceList:
                f.removeListDup()
                path = f.parentFile
                origBody, absBody = pu.new_abstract(f, absLevel, language)
                absBody = pu.normalize(absBody)
                funcLen = len(absBody)
                Json = {}
                if funcLen > 50:
                    hashValue = md5(absBody.encode('utf-8')).hexdigest()
                    cutLength = len(
                        str(f.parentFile.split(str(proj) + "/")[0]) +
                        str(proj) + "/")
                    Json["file"] = str(f.parentFile[cutLength:])
                    Json["function id"] = str(f.funcId)
                    Json["function length"] = str(funcLen)
                    Json["hash value"] = str(hashValue)
                    listOfHashJsons.append(Json)
                else:
                    numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold

        print("")
        print("[+] Hash index successfully generated.")
        print("[+] Saving hash index to file...", end=' ')

        try:
            os.mkdir("hidx")
        except:
            pass
        packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(
            numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n'
        with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx",
                  'w',
                  encoding="utf-8") as fp:
            fp.write(packageInfo)
            fp.write(str(listOfHashJsons))

        timeOut = time.time()

        print("(Done)")
        print("")
        print("[+] Elapsed time: %.02f sec." % (timeOut - timeIn))
        print("Program statistics:")
        print(" - " + str(numFile) + ' files;')
        print(" - " + str(numFunc) + ' functions;')
        print(" - " + str(numLine) + ' lines of code.')
        print("")
        print("[+] Hash index saved to: " + os.getcwd().replace("\\", "/") +
              "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx")
コード例 #5
0
    def generate(self):
        directory = self.directory.get()
        absLevel = int(self.absLevel.get())
        self.progress = 0

        proj = directory.replace('\\', '/').split('/')[-1]
        timeIn = time.time()
        numFile = 0
        numFunc = 0
        numMethods = 0
        numLine = 0
        numFields = 0

        projDic = {}
        hashFileMap = {}

        self.listProcess.config(state="normal")
        self.listProcess.insert(
            Tkinter.END,
            "Loading source files... This may take a few minutes.")
        self.listProcess.update()

        tupleList = pu.loadSource(directory)

        numFile = len(tupleList)
        if numFile == 0:
            self.listProcess.insert(Tkinter.END,
                                    "Error: Failed loading source files.")
            self.listProcess.insert(
                Tkinter.END,
                "- Check if you selected proper directory, or if your project contains .c, .cpp, .py, .js, .go or .java files."
            )
        else:
            self.listProcess.insert(Tkinter.END,
                                    "Load complete. Generating hashmark...")

            if absLevel == 0:
                func = parseFiles_shallow
            else:
                func = parseFiles_deep

            listOfHashJsons = []
            parseResult = {}

            for idx, tup in enumerate(tupleList):
                parseResult = func(tup)
                f = parseResult[0]
                functionInstanceList = parseResult[1]
                language = parseResult[2]
                pathOnly = f.split(proj, 1)[1][1:]
                progress = float(idx + 1) / numFile

                self.progressbar["value"] = progress
                self.progressbar.update()
                self.listProcess.insert(Tkinter.END, "[+] " + f)
                self.listProcess.see("end")

                numFunc += len(functionInstanceList)

                if len(functionInstanceList) > 0:
                    numLine += functionInstanceList[0].parentNumLoc
                for f in functionInstanceList:
                    f.removeListDup()
                    path = f.parentFile
                    origBody, absBody = pu.new_abstract(f, absLevel, language)
                    absBody = pu.normalize(absBody)
                    funcLen = len(absBody)

                    Json = {}
                    if funcLen > 50:
                        hashValue = md5(absBody.encode('utf-8')).hexdigest()
                        cutLength = len(
                            str(f.parentFile.split(str(proj) + "/")[0]) +
                            str(proj) + "/")
                        Json["file"] = str(f.parentFile[cutLength:])
                        Json["function id"] = str(f.funcId)
                        Json["function length"] = str(funcLen)
                        Json["hash value"] = str(hashValue)
                        listOfHashJsons.append(Json)
                    else:
                        numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold

            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(Tkinter.END,
                                    "Hash index successfully generated.")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END,
                                    "Saving hash index to file...")
            self.listProcess.see("end")

            try:
                os.mkdir("hidx")
            except:
                pass
            packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(
                numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n'
            with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx",
                      'w',
                      encoding="utf-8") as fp:
                fp.write(packageInfo)
                fp.write(str(listOfHashJsons))

            timeOut = time.time()

            self.listProcess.insert(Tkinter.END, "Done.")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(
                Tkinter.END, "Elapsed time: %.02f sec." % (timeOut - timeIn))
            self.listProcess.see("end")

            self.listProcess.insert(Tkinter.END, "Program statistics:")
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numFile) + ' files;')
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numFunc) + ' functions;')
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numLine) + ' lines of code.')
            self.listProcess.see("end")

            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(
                Tkinter.END,
                "Hash index saved to: " + os.getcwd().replace("\\", "/") +
                "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx")
            self.listProcess.see("end")
            self.btnOpenFolder.config(state="normal")

        return 0
def process_func_files(file_path):
    print file_path, "   started.."
    bitvector_size = config.bloomfilter_size
    bitvector = bitarray.bitarray(bitvector_size)
    bitvector_dic = {
    }  # record the slice's hashvalue and the line numbers. eg: {1839273: [1,5,7,9,10], 34502394: [6,7,8,10,11,12]}

    vul_dic = {}
    with open(config.vul_repo_file_path, 'r') as f:
        vul_dic = json.load(f, encoding='gbk')
    #print "[+]import vul completed."

    #index = 1
    #report_num = 1
    if not file_path.endswith(".c"):
        return

    # get variable list
    function = pu.parseFile_deep(file_path)

    if len(function) == 0:
        print "The file <", file_path, "> has ", len(function), "functions."
        return
    if len(function) != 1:
        print "The file <", file_path, "> has ", len(function), "functions."

    # a threshold for function
    if len(pu.normalize(function[0].funcBody)) < 50:
        return
    variable_list = function[0].variableList

    temp = produce_slice.produce_funcBody_hash(function[0])
    if temp == "":
        return
    hash_value = temp[0]
    for vulfunc_file_name, record in vul_dic.items():
        if record['hashvalue'][0] == hash_value:
            lock.acquire()
            report(file_path, 0, vulfunc_file_name, "", "Bingo(1)")
            lock.release()
            return

    dpd_content = ""
    func_content = []

    if not os.path.exists(
            os.path.join(config.src_funcDpd_path,
                         os.path.basename(file_path))):
        return
    with open(
            os.path.join(config.src_funcDpd_path, os.path.basename(file_path)),
            "r") as ff:
        temp = ff.readlines()
        dpd_content = "".join("".join(temp).split("\n"))
    dpd_dic = produce_slice.slice_from_project(dpd_content)
    with open(file_path, "r") as ff:
        func_content = ff.readlines()

    # build a bitvector according to dpd_dic
    bitvector.setall(0)
    for line_num, line_dpd in dpd_dic.items():
        slice_content = produce_slice.get_slice_content(func_content, line_dpd)
        if slice_content == []:
            continue
        if slice_content == "":
            print "[Error]The dpd-files wrong."
            return

        temp1 = produce_slice.produce_slice_hash(variable_list, slice_content)
        slice_hash = temp1[0]
        bitvector[slice_hash] = 1
        bitvector_dic[slice_hash] = line_dpd

        for vul_filename, record in vul_dic.items():
            if bitvector[record['hashvalue'][0]] == 1:
                line_list = bitvector_dic[record['hashvalue'][0]]
                line_list = list(set(line_list))
                line_list.sort()
                lock.acquire()
                report(file_path, line_list, vul_filename, "", "Bingo(3)")
                lock.release()
                return
            if len(record['hashvalue']) == 1:
                continue
            flag = True
            matched_hash = []
            for n in record['hashvalue'][1:]:
                if bitvector[n] == 1:
                    matched_hash.append(n)
                else:
                    flag = False
                    matched_hash = []
                    break
            if flag:
                line_list = []
                for i in matched_hash:
                    line_list.extend(bitvector_dic[i])
                line_list = list(set(line_list))
                line_list.sort()
                lock.acquire()
                report(file_path, line_list, vul_filename,
                       record['lineNumber'], "Bingo(2)")
                lock.release()
                return
    return
コード例 #7
0
                continue
            with open(os.path.join(dpd_file_path, func_file), 'r') as ff:
                temp = ff.readlines()
                dpd_content = "".join("".join(temp).split('\n'))
            dpd_dic = produce_slice.slice_from_project(dpd_content)

            with open(os.path.join(root, func_file), 'r') as ff:
                func_content = ff.readlines()

            # build a bitvector according to dpd_dic
            bitvector.setall(0)
            for line_num, line_dpd in dpd_dic.items():
                slice_content = produce_slice.get_slice_content(
                    func_content, line_dpd)
                slice_content = pu.removeComment(slice_content)
                slice_content = pu.normalize(slice_content)
                slice_hash = produce_slice.fnv1a_hash(slice_content)
                #---------------------------------------
                #print "slice_hash:", slice_hash
                #print "line_dpd:", line_dpd
                #=======================================
                bitvector[slice_hash] = 1
                bitvector_dic[slice_hash] = line_dpd

            #detect the vul_slice according the bitvector
            for vul_filename, records in vul_dic.items():
                #---------------------------
                #if vul_filename == r"(BadFunc)CVE-2008-5300$net#~unix#~garbage.c$scan_inflight.c":
                #print "\n\nvul hashvalue:", records['hashvalue']
                #print "vul dpd line numbers:", records['lineNumber']
                #============================
コード例 #8
0
ファイル: hmark.py プロジェクト: iotcube/hmark
def generate_cli(targetPath, isAbstraction):
    import subprocess
    directory = targetPath.rstrip('/').rstrip("\\")

    if isAbstraction.lower() == "on":
        absLevel = 4
    else:
        absLevel = 0

    proj = directory.replace('\\', '/').split('/')[-1]
    print "PROJ:", proj
    timeIn = time.time()
    numFile = 0
    numFunc = 0
    numLine = 0

    projDic = {}
    hashFileMap = {}

    print "[+] Loading source files... This may take a few minutes."

    fileList = pu.loadSource(directory)
    numFile = len(fileList)

    if numFile == 0:
        print "[-] Error: Failed loading source files."
        print "    Check if you selected proper directory, or if your project contains .c or .cpp files."
        sys.exit()
    else:
        print "[+] Load complete. Generating hashmark..."

        if absLevel == 0:
            func = parseFile_shallow_multi
        else:
            func = parseFile_deep_multi

        cpu_count = get_cpu_count.get_cpu_count()
        if cpu_count != 1:
            cpu_count -= 1

        pool = multiprocessing.Pool(processes=cpu_count)
        for idx, tup in enumerate(pool.imap_unordered(func, fileList)):
            f = tup[0]
            functionInstanceList = tup[1]

            fullName = proj + f.split(proj, 1)[1]
            pathOnly = f.split(proj, 1)[1][1:]

            if osName == "win":
                columns = 80
            else:
                try:
                    # http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python
                    rows, columns = subprocess.check_output(['stty',
                                                             'size']).split()
                except:
                    columns = 80

            progress = 100 * float(idx + 1) / numFile
            buf = "\r%.2f%% %s" % (progress, fullName)
            buf += " " * (int(columns) - len(buf))
            sys.stdout.write(buf)
            sys.stdout.flush()

            numFunc += len(functionInstanceList)

            if len(functionInstanceList) > 0:
                numLine += functionInstanceList[0].parentNumLoc

            for f in functionInstanceList:
                f.removeListDup()
                path = f.parentFile
                # print "\nORIGINALLY:", f.funcBody
                absBody = pu.abstract(f, absLevel)[1]
                absBody = pu.normalize(absBody)
                funcLen = len(absBody)
                # print "\n", funcLen, absBody

                if funcLen > 50:
                    hashValue = md5(absBody).hexdigest()

                    try:
                        projDic[funcLen].append(hashValue)
                    except KeyError:
                        projDic[funcLen] = [hashValue]
                    try:
                        hashFileMap[hashValue].extend([pathOnly, f.funcId])
                    except KeyError:
                        hashFileMap[hashValue] = [pathOnly, f.funcId]
                else:
                    numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold

        print ""
        print "[+] Hash index successfully generated."
        print "[+] Saving hash index to file...",

        packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(
            numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n'
        with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx",
                  'w') as fp:
            fp.write(packageInfo)

            for key in sorted(projDic):
                fp.write(str(key) + '\t')
                for h in list(set(projDic[key])):
                    fp.write(h + '\t')
                fp.write('\n')

            fp.write('\n=====\n')

            for key in sorted(hashFileMap):
                fp.write(str(key) + '\t')
                for f in hashFileMap[key]:
                    fp.write(str(f) + '\t')
                fp.write('\n')

        timeOut = time.time()

        print "(Done)"
        print ""
        print "[+] Elapsed time: %.02f sec." % (timeOut - timeIn)
        print "Program statistics:"
        print " - " + str(numFile) + ' files;'
        print " - " + str(numFunc) + ' functions;'
        print " - " + str(numLine) + ' lines of code.'
        print ""
        print "[+] Hash index saved to: " + os.getcwd().replace(
            "\\",
            "/") + "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx"
コード例 #9
0
ファイル: hmark.py プロジェクト: iotcube/hmark
    def generate(self):
        directory = self.directory.get()
        absLevel = int(self.absLevel.get())
        self.progress = 0

        proj = directory.replace('\\', '/').split('/')[-1]
        timeIn = time.time()
        numFile = 0
        numFunc = 0
        numLine = 0

        projDic = {}
        hashFileMap = {}

        self.listProcess.config(state="normal")
        self.listProcess.insert(
            Tkinter.END,
            "Loading source files... This may take a few minutes.")
        self.listProcess.update()

        fileList = pu.loadSource(directory)
        numFile = len(fileList)

        if numFile == 0:
            self.listProcess.insert(Tkinter.END,
                                    "Error: Failed loading source files.")
            self.listProcess.insert(
                Tkinter.END,
                "- Check if you selected proper directory, or if your project contains .c or .cpp files."
            )
        else:
            # self.listProcess.insert(END, "")
            self.listProcess.insert(Tkinter.END,
                                    "Load complete. Generating hashmark...")
            # self.listProcess.insert(END, "")
            # self.listProcess.insert(END, "")

            if absLevel == 0:
                func = parseFile_shallow_multi
            else:
                func = parseFile_deep_multi

            cpu_count = get_cpu_count.get_cpu_count()
            if cpu_count != 1:
                cpu_count -= 1

            pool = multiprocessing.Pool(processes=cpu_count)
            for idx, tup in enumerate(pool.imap_unordered(func, fileList)):
                f = tup[0]

                functionInstanceList = tup[1]
                pathOnly = f.split(proj, 1)[1][1:]
                progress = float(idx + 1) / numFile

                self.progressbar["value"] = progress
                self.progressbar.update()
                self.listProcess.insert(Tkinter.END, "[+] " + f)
                self.listProcess.see("end")

                numFunc += len(functionInstanceList)

                if len(functionInstanceList) > 0:
                    numLine += functionInstanceList[0].parentNumLoc

                for f in functionInstanceList:
                    f.removeListDup()
                    path = f.parentFile
                    absBody = pu.abstract(f, absLevel)[1]
                    # self.listProcess.insert(Tkinter.END, absBody)
                    absBody = pu.normalize(absBody)
                    funcLen = len(absBody)

                    if funcLen > 50:
                        hashValue = md5(absBody).hexdigest()

                        try:
                            projDic[funcLen].append(hashValue)
                        except KeyError:
                            projDic[funcLen] = [hashValue]
                        try:
                            hashFileMap[hashValue].extend([pathOnly, f.funcId])
                        except KeyError:
                            hashFileMap[hashValue] = [pathOnly, f.funcId]
                    else:
                        numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold

            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(Tkinter.END,
                                    "Hash index successfully generated.")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END,
                                    "Saving hash index to file...")
            self.listProcess.see("end")

            try:
                os.mkdir("hidx")
            except:
                pass
            packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(
                numFile) + ' ' + str(numFunc) + ' ' + str(numLine) + '\n'
            with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx",
                      'w') as fp:
                fp.write(packageInfo)

                for key in sorted(projDic):
                    fp.write(str(key) + '\t')
                    for h in list(set(projDic[key])):
                        fp.write(h + '\t')
                    fp.write('\n')

                fp.write('\n=====\n')

                for key in sorted(hashFileMap):
                    fp.write(str(key) + '\t')
                    for f in hashFileMap[key]:
                        fp.write(str(f) + '\t')
                    fp.write('\n')

            timeOut = time.time()

            self.listProcess.insert(Tkinter.END, "Done.")
            self.listProcess.see("end")
            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(
                Tkinter.END, "Elapsed time: %.02f sec." % (timeOut - timeIn))
            self.listProcess.see("end")

            self.listProcess.insert(Tkinter.END, "Program statistics:")
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numFile) + ' files;')
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numFunc) + ' functions;')
            self.listProcess.insert(Tkinter.END,
                                    " - " + str(numLine) + ' lines of code.')
            self.listProcess.see("end")

            self.listProcess.insert(Tkinter.END, "")
            self.listProcess.insert(
                Tkinter.END,
                "Hash index saved to: " + os.getcwd().replace("\\", "/") +
                "/hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx")
            self.listProcess.see("end")
            self.btnOpenFolder.config(state="normal")

        return 0