def get_privacy(self, tag_list):
        api_names = []
        api_descriptions = []
        api2signature = {}
        tp = 0
        fp = 0
        pkg_name = ""
        classname = ""
        for i in range(0, len(tag_list)):
            tag = tag_list[i]
            if tag.name == 'h2':
                pkg_name = tag_list[i - 1].getText()
                classname = tag.getText().split(" ")[-1]
            is_method_section = False
            if tag.name == 'h3':
                des_text = tag.getText()
                if "Public Member Functions" in des_text or "Method Detail" in des_text or "方法详细资料" in des_text or "メソッドの詳細" in des_text:
                    is_method_section = True
                if not is_method_section:
                    continue
                for j in range(i + 1, len(tag_list)):
                    next_tag = tag_list[j]
                    if next_tag.name == 'h4':
                        if j + 1 >= len(tag_list):
                            break
                        api_name = next_tag.getText()
                        pre_tag = tag_list[j + 1]
                        if pre_tag.name == "pre":
                            signature = pre_tag.getText()
                        else:
                            continue
                        api_names.append(api_name)
                        self.apis.append(api_name)
                        api2signature[api_name] = signature
                        if j + 2 < len(tag_list) and tag_list[j + 2].name == "div":
                            description = tag_list[j + 2].getText()
                            # print("*******************")
                            # print(api_name)
                            # print(description)
                            # print("===================")
                            api_descriptions.append(description)
                        else:
                            api_descriptions.append("")

        for i in range(0, len(api_names)):
            api_name = api_names[i]
            api_description = api_descriptions[i]
            is_sensitive, privacy_item = check_api_by_class(classname, api_name)
            if is_sensitive:
                api_description = api_description.replace("\n", " ")
                self.sensitive_apis.append((pkg_name + "." + classname, api_name, privacy_item, api_description))
                if pkg_name == "":
                    logger.error(classname)
                fp = fp + 1
                continue
            tp = tp + 1
        return tp, fp
 def get_privacy(self, tag_list):
     api_names = []
     api_descriptions = []
     tp = 0
     fp = 0
     for tag in tag_list:
         if tag.name == 'table' and 'class' in tag.attrs.keys(
         ) and 'jd-inheritance-table' in tag.attrs['class']:
             td_list = tag.find_all("td")
             self.processing_class = td_list[-1].getText()
             break
     for i in range(0, len(tag_list)):
         tag = tag_list[i]
         is_method_section = False
         if tag.name == 'h2':
             des_text = tag.getText()
             if "Public Methods" in des_text:
                 is_method_section = True
             if not is_method_section:
                 continue
             for j in range(i + 1, len(tag_list)):
                 next_tag = tag_list[j]
                 if next_tag.name == 'span' and 'class' in next_tag.attrs.keys(
                 ) and 'sympad' in next_tag.attrs['class']:
                     if j + 1 >= len(tag_list):
                         break
                     api_name = next_tag.getText()
                     api_names.append(api_name)
                     if tag_list[j +
                                 5].name == "div" and 'class' in tag_list[
                                     j + 5].attrs.keys(
                                     ) and 'jd-tagdescr' in tag_list[
                                         j + 5].attrs['class']:
                         api_description = tag_list[j + 5].getText()
                         api_descriptions.append(api_description)
                     else:
                         api_descriptions.append("")
     for i in range(len(api_names)):
         api = api_names[i]
         self.apis.add(api)
         api_description = api_descriptions[i]
         is_sensitive, privacy_item = check_api_by_class(
             self.processing_class, api)
         if is_sensitive:
             api_description = api_description.replace("\n", " ")
             self.sensitive_apis.append((self.processing_class, api,
                                         privacy_item, api_description))
             fp = fp + 1
             continue
         tp = tp + 1
     return tp, fp
 def get_privacy(self, tag_list):
     api2des = {}
     tp = 0
     fp = 0
     method_tag = False
     full_class_name = ""
     for tag in tag_list:
         if tag.name == "li":
             txt = tag.getText()
             if "com.facebook" in txt:
                 full_class_name = tag.getText()
     for i in range(0, len(tag_list)):
         tag = tag_list[i]  # tag is a table class used to parse method.
         if tag.name == "span" and "Method Summary" in tag.getText():
             method_table = tag_list[i + 1].find_all()
             first = True
             for row in method_table:
                 if row.name != "tr":
                     continue
                 if first:
                     first = False
                     continue
                 columns = row.find_all()
                 for column in columns:
                     if column.name == "td":
                         api = column.getText()
                         left = api.find("(")
                         if left != -1:
                             self.apis.append(api[:left])
         if "Method Detail" in tag.getText():
             method_tag = True
         if method_tag and tag.name == "h4" and tag.getText() in self.apis:
             if len(tag_list[i + 3].getText()) > len(
                     tag_list[i + 2].getText()):
                 des = tag_list[i + 3].getText()
             else:
                 des = tag_list[i + 2].getText()
             api2des[tag.getText()] = des
     for api_name, description in api2des.items():
         is_sensitive, privacy_item = check_api_by_class(
             full_class_name, api_name)
         if is_sensitive:
             description = description.replace("\n", " ")
             self.sensitive_apis.append(
                 (full_class_name, api_name, description, privacy_item))
             fp = fp + 1
             continue
         tp = tp + 1
     return tp, fp
Exemple #4
0
 def get_privacy(self, tag_list):
     api_names = set()
     # api2des = {}
     api2signature = {}
     tp = 0
     fp = 0
     for i in range(0, len(tag_list)):
         tag = tag_list[i]
         is_method_section = False
         if tag.name == 'h2':
             # print(tag.getText())
             des_text = tag.getText()
             if "Methods" in des_text:
                 is_method_section = True
             if not is_method_section:
                 continue
             for j in range(i + 1, len(tag_list)):
                 next_tag = tag_list[j]
                 if next_tag.name == "dl" and "class" in next_tag.attrs.keys(
                 ) and "method" in next_tag.attrs["class"]:
                     if j + 1 >= len(tag_list):
                         break
                     signature = tag_list[j + 1]["id"]
                     api_name = signature.split(".")[-1]
                     api_name = api_name[:api_name.rfind("(")]
                     api_names.add(api_name)
                     self.apis.append(api_name)
                     api2signature[api_name] = signature
                     # if j + 2 < len(tag_list) and tag_list[j + 2].name == "div":
                     #     description = tag_list[j + 2].getText()
                     #     api2des[api_name] = description
     # print("first=" + str(len(self.apis)))
     # print("second=" + str(len(api2des)))
     for api in api_names:
         is_sensitive, privacy_item = check_api_by_class(
             self.processing_class, api)
         if is_sensitive:
             self.sensitive_apis.append(
                 (self.processing_class, api, privacy_item))
             fp = fp + 1
             continue
         tp = tp + 1
     return tp, fp
Exemple #5
0
 def run(self):
     for method in self.methods:
         clazz = method.cls
         if "javax/" in clazz.package_name or "java/" in clazz.package_name or "android/" in clazz.package_name or "org/w3c" in clazz.package_name:
             continue
         if "<init>" == method.name or "<clinit>" == method.name or "toString" == method.name or "clone" == method.name:
             continue
         # Naive rules to filter obfuscated identifiers
         # if len(method.name) == 1:
         #     continue
         # para_list = method.prototype.parameters_type
         # if len(para_list) == 2:
         #     if para_list[0] == "Ljava/lang/String;" and para_list[1] == "Ljava/lang/String;":
         #         print(method)
         self.apis.append(method.name)
         is_sensitive, privacy_item = check_api_by_class(
             method.cls.fullname, method.name)
         if is_sensitive:
             self.sensitive_apis.append(
                 [method.cls.fullname, method.name, privacy_item])
    def get_privacy(self, tag_list):
        api_names = []
        api_descriptions = []
        api2signature = {}
        tp = 0
        fp = 0
        self.processing_class = self.processing_class[5:-5]
        self.processing_class = self.processing_class.replace("_1_1", ".")
        class_info = ""
        tag = False
        for ch in self.processing_class:
            if tag:
                class_info = class_info + ch.upper()
                tag = False
            else:
                if ch != "_":
                    class_info = class_info + ch
                else:
                    tag = True
        class_info = class_info.strip()
        classname = class_info.split(".")[-1]
        pkg_name = class_info[:-(len(classname) + 1)]
        for i in range(0, len(tag_list)):
            tag = tag_list[i]
            # if "Class Reference" in tag.getText() and tag.name == "title":
            #     print(tag.getText().split(":")[1].strip().split(" ")[0] + "__" + tag.name)
            #     pkg_name = tag_list[i - 1].getText()
            #     classname = tag.getText().split(" ")[-1]
            is_method_section = False
            if tag.name == 'h2':
                des_text = tag.getText()
                if "Member Function Documentation" in des_text:
                    is_method_section = True
                if not is_method_section:
                    continue
                for j in range(i + 1, len(tag_list)):
                    next_tag = tag_list[j]
                    if next_tag.name == "td" and "class" in next_tag.attrs.keys() and "memname" in next_tag.attrs["class"]:
                        # print(next_tag.getText())
                        api_name = str(next_tag.getText()).strip().split(" ")[-1]
                        if "." in api_name:
                            api_name = api_name.split(".")[-1]
                        # print(api_name)
                        # if pre_tag.name == "pre":
                        #     signature = pre_tag.getText()
                        # else:
                        #     continue
                        api_names.append(api_name)
                        self.apis.append(api_name)
                        # api2signature[api_name] = signature
                        # if j + 2 < len(tag_list) and tag_list[j + 2].name == "div":
                        #     description = tag_list[j + 2].getText()
                        #     # print("*******************")
                        #     # print(api_name)
                        #     # print(description)
                        #     # print("===================")
                        #     api_descriptions.append(description)
                        # else:
                        #     api_descriptions.append("")

        for i in range(0, len(api_names)):
            api_name = api_names[i]
            # api_description = api_descriptions[i]
            is_sensitive, privacy_item = check_api_by_class(classname, api_name)
            if is_sensitive:
                # api_description = api_description.replace("\n", " ")
                self.sensitive_apis.append((pkg_name + "." + classname, api_name, privacy_item))
                # self.sensitive_apis.append((pkg_name + "." + classname, api_name, privacy_item, api_description))
                if pkg_name == "":
                    logger.error(classname + " no Class!")
                fp = fp + 1
                continue
            tp = tp + 1
        return tp, fp