Ejemplo n.º 1
0
def process_results():
    result_folders = get_first_layer_folders("." + os.sep + "api_results")
    csv_cnt = 0
    csv_file_cnt = 0
    for res_folder in result_folders:
        csv_files = get_first_layer_files(res_folder, html=False)
        csv_file_cnt += len(csv_files)
        api_sum = 0
        for csv_file in csv_files:
            csv = open(csv_file, "r", encoding="utf-8")
            # print(csv_file)
            lines = csv.readlines()
            csv_name = csv_file.split(os.sep)[-1][:-4]
            sum_cnt = 0
            general_cnt = 0
            for line in lines:
                if "," in line:
                    sum_cnt = sum_cnt + 1
                if "logevent" in line or "GeneralLogEvent" in line or "trackEvent" in line or \
                        "GeneralUserProperty" in line:
                    general_cnt = general_cnt + 1
            api_sum += sum_cnt
            if sum_cnt >= 0:
                if general_cnt >= 0:
                    csv_cnt = csv_cnt + 1
                    # print("CSV_Name=" + csv_name + ", Privacy API=" + str(sum_cnt) + ", General API=" + str(general_cnt))
                    # print(csv_name + "," + str(sum_cnt) + "," + str(general_cnt))
                    # loss = loss + (sum_cnt - general_cnt) / sum_cnt
        if api_sum == 0:
            print("!!!=" + res_folder)
    print("CSV File Count=" + str(csv_file_cnt))
Ejemplo n.º 2
0
 def run(self):
     get_sensitive_keywords()
     print(self.sensitive_keywords)
     sum_items = 0
     sum_tp = 0
     sum_fp = 0
     sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
     api_folders = get_first_layer_files(
         "E:\\Lab Work\\dataset_science\\API_Docs\\Countly_API_Docs_test\\")
     sum_acs = 0
     for api_folder in api_folders:
         logger.info("Processing Folder:" + str(api_folder))
         (items, tp, fp, acs) = self.process_api(api_folder)
         sum_acs = sum_acs + acs
         sum_items = sum_items + items
         sum_tp = sum_tp + tp
         sum_fp = sum_fp + fp
     print("SUM=" + str(sum_acs))
     # print("SUM_ITEMS=" + str(sum_items))
     # print("SUM_TP=" + str(sum_tp))
     # print("SUM_FP=" + str(sum_fp))
     print("-----Sensitive Results-----")
     for sensitive_result in self.sensitive_results:
         print(sensitive_result)
     # logger.info("API Count=" + str(api_cnt))
     logger.info("Sensitive API Count=" + str(len(self.sensitive_results)))
Ejemplo n.º 3
0
 def run(self):
     get_sensitive_keywords()
     api_folders = get_first_layer_files(Config.target_folder)
     for api_folder in api_folders:
         logger.info("Processing Folder:" + str(api_folder))
         self.process_api(api_folder)
     print("-----Sensitive Results-----")
     print("APIs Count=" + str(len(self.apis)))
     print("Sensitive APIs Count=" + str(len(self.sensitive_results)))
Ejemplo n.º 4
0
def parse_dex_folder(target_folder):
    logger.info("Dex Folder=" + target_folder)
    files = get_first_layer_files(target_folder, False)
    # print(len(files))
    for file in files:
        try:
            parser = DexFileParser(file)
            parser.run()
            parser.print_results()
            # parser.print_to_csv()
        except Exception as e:
            print(e)
Ejemplo n.º 5
0
 def process_api(self, folder):
     tp = 0
     fp = 0
     files_list = get_first_layer_files(folder)
     for i in range(0, len(files_list)):
         file = files_list[i]
         self.processing_class = file.split("\\")[-1].split(" ")[0]
         soup = BeautifulSoup(open(file, encoding='utf-8'),
                              features='html.parser')
         tag_list = soup.find_all()
         c_tp, c_fp = self.get_privacy(tag_list)
         tp = tp + c_tp
         fp = fp + c_fp
     return tp, fp
Ejemplo n.º 6
0
def process_javadoc_package_folder(target_folder):
    file_list = get_first_layer_files(target_folder, html=False)
    java_doc_folders = []
    new_folder_path = target_folder + os.sep + "new_sdks"
    for file in file_list:
        if ".jar" in file:
            # print("File=" + file)
            zip_file = zipfile.ZipFile(file, "r")
            sdk_name = file.split(os.sep)[-1][:-4]
            # print("SDK_Name=" + sdk_name)
            folder_path = file[0:file.rfind(os.sep)]
            # print("Folder_Path=" + folder_path)
            extract_path = folder_path + os.sep + sdk_name
            # print("Extract_Path=" + extract_path)
            zip_file.extractall(extract_path)
            file_list = get_all_files(extract_path, html=True)
            # print(extract_path)
            black_list = [
                "allclasses", "constant-values", "deprecated-list", "help-doc",
                "index", "overview-", "package-"
            ]
            html_list = []
            for html_file in file_list:
                file_name = html_file.split("\\")[-1]
                neglect = False
                for prefix in black_list:
                    if file_name.startswith(prefix):
                        neglect = True
                        break
                if neglect:
                    continue
                html_list.append(html_file)
            new_sdk_folder = new_folder_path + os.sep + sdk_name + "_new"
            next_folder = new_sdk_folder + os.sep + "All"
            # print("new_folder=" + new_sdk_folder)
            # print("next_folder=" + next_folder)
            if not os.path.exists(new_folder_path):
                os.mkdir(new_folder_path)
            if not os.path.exists(new_sdk_folder):
                os.mkdir(new_sdk_folder)
            if not os.path.exists(next_folder):
                os.mkdir(next_folder)
            for html_file in html_list:
                html_name = html_file.split(os.sep)[-1]
                copyfile(html_file, next_folder + os.sep + html_name)
            java_doc_folders.append(new_sdk_folder)
    parse_historical_javadoc_folder_(
        new_folder_path.split("\\")[-2], new_folder_path)
Ejemplo n.º 7
0
 def process_api(self, folder):
     tp = 0
     fp = 0
     files_list = get_first_layer_files(folder)
     for i in range(0, len(files_list)):
         file = files_list[i]
         self.processing_class = file.split("\\")[-1].split(" ")[0]
         # We should consider the full class name here.
         # logger.info("Processing Class=" + self.processing_class)
         try:
             soup = BeautifulSoup(open(file, encoding="gb18030"), features='html.parser')
         except Exception:
             soup = BeautifulSoup(open(file, encoding="utf-8"), features='html.parser')
         tag_list = soup.find_all()
         c_tp, c_fp = self.get_privacy(tag_list)
         tp = tp + c_tp
         fp = fp + c_fp
     return tp, fp
Ejemplo n.º 8
0
def parse_jar_folder(target_folder):
    # logger.info("Jar Folder=" + target_folder)
    jar_files = get_first_layer_files(target_folder, False)
    for jar_file in jar_files:
        if not jar_file.endswith(".jar"):
            continue
        jar_name = jar_file.split(os.sep)[-1]
        print(jar_name)
        sdk_name = jar_name[-4]
        # logger.info("Processing File=" + jar_file)
        try:
            parser = DexFileParser(sdk_name, jar_file)
            parser.run()
            parser.print_results()
            parser.print_to_csv()
        except Exception as e:
            print(jar_name + " meets exception!")
            print(e)