Example #1
0
    def parse_edb_cve(self, url, item, html):
        edb_html = HTML(html=html)

        raw_id, edb_title, edb_author, edb_type, edb_platform, edb_rport, edb_published = item

        edb_id = "EDB-{}".format(raw_id)
        edb_url = url
        edb_verified = get_val(edb_html.xpath(element_xpath['edb_verified']))

        try:
            edb_cve_num = [
                i.strip() for i in edb_html.xpath(element_xpath['edb_cve'])
            ]
            if edb_cve_num:
                maped_edb_cve = [
                    "CVE-{}".format(cve_id) for cve_id in edb_cve_num
                ]
                edb_cve = ','.join(maped_edb_cve)
                tqdm.write("Detected {} <--> {}".format(edb_id, edb_cve))
        except Exception:
            edb_cve = 'N/A'

        if 'mdi-close' in edb_verified:
            edb_verified = 'Unverified'
        else:
            edb_verified = 'Verified'

        edb_exploit_raw_url = 'https://www.exploit-db.com/raw/{}'.format(
            raw_id)
        edb_vulnerable_app_url = get_val(
            edb_html.xpath(element_xpath['edb_vulnerable_app_url']))

        if edb_vulnerable_app_url != "":
            edb_vulnerable_app_url = 'https://www.exploit-db.com' + edb_vulnerable_app_url

        edb_collect_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

        exploit_record = EdbRecord(
            edb_id=edb_id,
            edb_title=edb_title,
            edb_url=edb_url,
            edb_author=edb_author,
            edb_cve=edb_cve,
            edb_type=edb_type,
            edb_platform=edb_platform,
            edb_remote_ports=edb_rport,
            edb_verified=edb_verified,
            edb_vulnerable_app_url=edb_vulnerable_app_url,
            edb_exploit_raw_url=edb_exploit_raw_url,
            edb_published=edb_published,
            edb_collect_date=edb_collect_date)
        self.insert_record(exploit_record)
Example #2
0
    def categorize(self, check_list):
        logging.info("Categorizing the Exploits...")
        parsed_list = []
        target_url_list = []
        target_edb_list = []
        try:
            raw_map_url = EDB_MAP_URL
            raw_map = requests.get(raw_map_url, headers=self.headers)
            edb_cve_map = ujson.loads(raw_map.text)
        except Exception:
            edb_cve_map = self.recover_map_from_db()

        for row in tqdm(check_list):
            e_id, e_file, e_title, e_published, e_author, e_platform, e_type, e_rport = tuple(
                row)
            e_url = "https://www.exploit-db.com/exploits/" + e_id
            target_id = "EDB-{}".format(e_id)

            if not self.edb_dao.exist(target_id):
                if e_id in edb_cve_map.keys():
                    try:
                        e_cve = get_val(edb_cve_map[e_id])
                    except KeyError:
                        e_cve = 'N/A'

                    parsed_list.append([
                        e_id, e_title, e_url, e_author, e_cve, e_type,
                        e_platform, e_rport, e_published
                    ])
                else:
                    target_url_list.append(e_url)
                    target_edb_list.append([
                        e_id, e_title, e_author, e_type, e_platform, e_rport,
                        e_published
                    ])
            else:
                # TODO: update of records existing in the DB
                pass

        return parsed_list, target_url_list, target_edb_list
Example #3
0
    def parse_msf_module_local(self, target_file):
        regex_pattern = {
            'module_info':
            r"initialize[\s\S]*?end\n",
            'module_title':
            r"['|\"]Name['|\"][ |\t|\S]+['|\"|\)]",
            'module_describe':
            r"['|\"]Description['|\"][\s\S]*?['|\"|\)],\n|['|\"]Description['|\"][^\}]+},\n",
            'module_authors':
            r"['|\"]Author['|\"][^\]]+\],\n|['|\"]Author['|\"][ |\t|\S]+['|\"|\)|\]],\n",
            'module_cve':
            r"['|\"]CVE['|\"],\s['|\"]\d{4}-\d+['|\"]",
            'module_edb':
            r"['|\"]EDB['|\"],\s['|\"]\d+['|\"]",
            'module_cwe':
            r"['|\"]CWE['|\"],\s['|\"]\d+['|\"]",
            'module_bid':
            r"['|\"]BID['|\"],\s['|\"]\d+['|\"]",
            'module_zdi':
            r"['|\"]ZDI['|\"],\s['|\"]\d{2}-\d+['|\"]",
            'module_msb':
            r"['|\"]MSB['|\"],\s['|\"]MS\d{2}-\d+['|\"]",
            'module_osvdb':
            r"['|\"]OSVDB['|\"],\s['|\"]\d+['|\"]",
            'module_wpvdb':
            r"['|\"]WPVDB['|\"],\s['|\"]\d+['|\"]",
            'module_uscert':
            r"['|\"]US-CERT-VU['|\"],\s['|\"]\S+['|\"]",
            'module_packet':
            r"['|\"]PACKETSTORM['|\"],\s['|\"]\S+['|\"]",
            'module_ref_url':
            r"['|\"]URL['|\"],\s['|\"]\S+['|\"]",
            'module_platforms_fmt':
            r"['|\"]Platform['|\"][ |\t]+=>[ |\t]%+[^\}]+},\n",
            'module_platforms':
            r"['|\"]Platform['|\"][ |\t|\S]+['|\"|\)|\]],\n|['|\"]Platform['|\"][^\}]+},\n",
            'module_disclosure_date':
            r"['|\"]DisclosureDate['|\"][ |\t|\S]+['|\"],*\n",
        }

        file_obj = open(target_file, "r")
        source_code = file_obj.read()
        update_info_code = get_val(
            re.findall(regex_pattern['module_info'], source_code))

        module_name_start_pos = target_file.find("modules")
        module_name = target_file[module_name_start_pos:]
        module_class = module_name.split(PATH_SPLIT)[1]
        module_url = f"https://www.rapid7.com/db/modules/{module_name}".replace(
            "exploits", "exploit").replace(".rb", "")
        module_title = self.optimize_title(
            get_val(re.findall(regex_pattern['module_title'],
                               update_info_code)))
        module_describe_words = self.optimize_describe(
            get_val(
                re.findall(regex_pattern['module_describe'],
                           update_info_code))).split()
        module_describe = ' '.join(module_describe_words)

        # TODO: Efficient author's parsing method
        # module_authors = get_val(re.findall(regex_pattern['module_authors'], update_info_code))

        module_cve = self.optimize_ref_id(
            get_val(re.findall(regex_pattern['module_cve'], update_info_code)))
        module_edb = self.optimize_ref_id(
            get_val(re.findall(regex_pattern['module_edb'], update_info_code)))

        module_cwe = self.optimize_ref_id(
            get_val(re.findall(regex_pattern['module_cwe'],
                               update_info_code))).split(",")
        module_bid = self.optimize_ref_id(
            get_val(re.findall(regex_pattern['module_bid'],
                               update_info_code))).split(",")
        module_zdi = self.optimize_ref_id(
            get_val(re.findall(regex_pattern['module_zdi'],
                               update_info_code))).split(",")
        module_msb = self.optimize_ref_id(
            get_val(re.findall(regex_pattern['module_msb'],
                               update_info_code))).split(",")
        module_osvdb = self.optimize_ref_id(
            get_val(re.findall(regex_pattern['module_osvdb'],
                               update_info_code))).split(",")
        module_wpvdb = self.optimize_ref_id(
            get_val(re.findall(regex_pattern['module_wpvdb'],
                               update_info_code))).split(",")
        module_uscert = self.optimize_ref_id(
            get_val(
                re.findall(regex_pattern['module_uscert'],
                           update_info_code))).split(",")
        module_packet = self.optimize_ref_id(
            get_val(
                re.findall(regex_pattern['module_packet'],
                           update_info_code))).split(",")

        module_ref_url = self.optimize_ref_url(
            get_val(
                re.findall(regex_pattern['module_ref_url'], update_info_code)))
        module_ref_list = module_cwe + module_bid + module_zdi + module_msb + \
            module_osvdb + module_wpvdb + module_uscert + module_packet + module_ref_url
        module_ref_list = list(filter(lambda str: str != '', module_ref_list))
        module_references = get_val(module_ref_list)

        try:
            module_platforms = self.optimize_platforms(
                re.findall(regex_pattern['module_platforms_fmt'],
                           update_info_code)[0])
        except IndexError:
            try:
                module_platforms = self.optimize_platforms(
                    re.findall(regex_pattern['module_platforms'],
                               update_info_code)[0])
            except IndexError:
                module_platforms = ""

        module_remote_ports = self.optimize_remote_port(source_code)

        module_disclosure_date = self.optimize_disclosure_date(
            get_val(
                re.findall(regex_pattern['module_disclosure_date'],
                           update_info_code)))
        module_collect_date = time.strftime("%Y-%m-%d %H:%M:%S",
                                            time.localtime())

        file_obj.close()

        msf_record = MsfRecord(module_name=module_name,
                               module_class=module_class,
                               module_title=module_title,
                               module_url=module_url,
                               module_describe=module_describe,
                               module_cve=module_cve,
                               module_edb=module_edb,
                               module_references=module_references,
                               module_platforms=module_platforms,
                               module_remote_ports=module_remote_ports,
                               module_disclosure_date=module_disclosure_date,
                               module_collect_date=module_collect_date)

        self.insert_record(msf_record)
Example #4
0
    def parse_edb_cve(self, edb_item):
        raw_id = edb_item['id']
        edb_id = "EDB-{}".format(raw_id)
        edb_url = "https://www.exploit-db.com/exploits/{}/".format(raw_id)

        page = self.request(edb_url)

        try:
            raw_id = page.html.xpath(
                element_xpath['edb_id'])[0].strip(':').strip()
            edb_id = "EDB-{}".format(raw_id)
        except Exception:
            logging.error("Request error, maybe record have been removed")
            exploit_record = EdbRecord(edb_id=raw_id)
            self.insert_record(exploit_record)

        edb_title = get_val(page.html.xpath(element_xpath['edb_title']))
        edb_author = get_val(page.html.xpath(element_xpath['edb_author']))

        try:
            edb_cve_num = [
                i.strip() for i in page.html.xpath(element_xpath['edb_cve'])
            ]
            if edb_cve_num != '' and edb_cve_num != 'N/A':
                maped_edb_cve = [
                    "CVE-{}".format(cve_id) for cve_id in edb_cve_num
                ]
                edb_cve = ','.join(maped_edb_cve)
        except Exception:
            edb_cve = 'N/A'

        edb_type = get_val(page.html.xpath(element_xpath['edb_type']))
        edb_platform = get_val(page.html.xpath(element_xpath['edb_platform']))
        edb_verified = get_val(page.html.xpath(element_xpath['edb_verified']))

        if 'mdi-close' in edb_verified:
            edb_verified = 'Unverified'
        else:
            edb_verified = 'Verified'

        edb_exploit_raw_url = 'https://www.exploit-db.com/raw/{}'.format(
            raw_id)
        edb_vulnerable_app_url = get_val(
            page.html.xpath(element_xpath['edb_vulnerable_app_url']))

        if edb_vulnerable_app_url != "":
            edb_vulnerable_app_url = 'https://www.exploit-db.com' + edb_vulnerable_app_url

        edb_published = page.html.xpath(
            element_xpath['edb_published'])[0].strip(':').strip()
        edb_collect_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

        exploit_record = EdbRecord(
            edb_id=edb_id,
            edb_title=edb_title,
            edb_url=edb_url,
            edb_author=edb_author,
            edb_cve=edb_cve,
            edb_type=edb_type,
            edb_platform=edb_platform,
            edb_remote_ports="",
            edb_verified=edb_verified,
            edb_vulnerable_app_url=edb_vulnerable_app_url,
            edb_exploit_raw_url=edb_exploit_raw_url,
            edb_published=edb_published,
            edb_collect_date=edb_collect_date)
        self.insert_record(exploit_record)
Example #5
0
    def parse_msf_module(self, item):
        url = "https://www.rapid7.com{}".format(item)
        page = self.request(url)

        if page.status_code != 200:
            msf_record = MsfRecord(module_name=item[11:])
            self.msf_dao.add(msf_record)

        element_xpath = {
            'module_title':
            '//div[@class="vulndb__detail-main"]/h3/text()',
            'module_url':
            '/html/head/link[@rel="canonical"]/@href',
            'module_devlink':
            '//section[contains(@class,"vulndb__solution")]/ul/li[1]/a/@href',
            'module_describe':
            '//div[contains(@class,"vulndb__detail-content")]/p/text()',
            'module_authors':
            '//div[contains(@class,"vulndb__detail-content")]/ul/li/text()',
            'module_references':
            '//section[contains(@class,"vulndb__references")]/ul/li//text()',
            'module_platforms':
            '//div[contains(@class,"vulndb__detail-content")]/p[2]/text()',
            'module_architectures':
            '//div[contains(@class,"vulndb__detail-content")]/p[3]/text()',
        }

        module_url = get_val(page.html.xpath(element_xpath["module_url"]))
        module_devlink = get_val(
            page.html.xpath(element_xpath["module_devlink"]))
        module_name = module_devlink[60:]
        module_title = get_val(page.html.xpath(element_xpath["module_title"]))
        module_describe_words = page.html.xpath(
            element_xpath["module_describe"])[0].split()
        module_describe = ' '.join(module_describe_words)

        module_authors = get_val(
            page.html.xpath(element_xpath["module_authors"]))

        module_references = get_val(
            page.html.xpath(element_xpath["module_references"]))
        module_cve = ""
        module_edb = ""

        # Extracting CVEs&EDBs from reference information
        if module_references is not None:
            cve_list = []
            edb_list = []
            pattern = r"CVE-\d{4}-\d+|EDB-\d+"
            numbering_list = re.findall(pattern, module_references)
            exclusion_pattern = r"CVE-\d{4}-\d+,?|EDB-\d+,?"
            module_references = re.sub(exclusion_pattern, "",
                                       module_references)

            for item in numbering_list:
                if "CVE" in item:
                    cve_list.append(item)
                elif "EDB" in item:
                    edb_list.append(item)

            if len(cve_list) >= 1:
                module_cve = ','.join(cve_list)
            if len(edb_list) >= 1:
                module_edb = ','.join(edb_list)

        module_platforms = get_val(
            page.html.xpath(element_xpath["module_platforms"]))
        module_architectures = get_val(
            page.html.xpath(element_xpath["module_architectures"]))

        modified_date = self.get_modified_date(module_name)
        module_update_date = parser.parse(modified_date).strftime(
            "%Y-%m-%d %H:%M:%S")
        module_collect_date = time.strftime("%Y-%m-%d %H:%M:%S",
                                            time.localtime())

        msf_record = MsfRecord(module_name=module_name,
                               module_title=module_title,
                               module_url=module_url,
                               module_describe=module_describe,
                               module_authors=module_authors,
                               module_cve=module_cve,
                               module_edb=module_edb,
                               module_references=module_references,
                               module_platforms=module_platforms,
                               module_architectures=module_architectures,
                               module_update_date=module_update_date,
                               module_collect_date=module_collect_date)

        self.insert_record(msf_record)