def construct_cpe_model_for_cve(cve: CVE, cve_times: list): timer = ChronoTimer() cve.configurations = [] # Workaround for a bug semi_model, running_conf = scraper.get_CPEs(cve) print('Finding direct exploits for {}'.format(cve.cve_id)) timer.start_exploit_scraping() direct_exploits = scraper.get_exploits_for_CVE(cve) indirect_exploits = dict() cpesToCheck = [] for v in semi_model: for p in semi_model[v]: for cpe in semi_model[v][p]: cpesToCheck.append(cpe) print('Finding indirect exploits for {}. Checking {} CPEs'.format( cve.cve_id, len(cpesToCheck))) n = len(cpesToCheck) with ThreadPoolExecutor(max_workers=50) as pool: futures = {} cveExploitDict = dict() for cpe in cpesToCheck: f = pool.submit(scraper.get_exploits_for_CPE, cpe, cve, cveExploitDict) futures[cpe] = f indirect_exploits = {cpe: futures[cpe].result() for cpe in cpesToCheck} timer.stop_exploit_scraping() generate_tree(cve, semi_model, running_conf, direct_exploits, indirect_exploits, timer) print('Wrote tree for ' + cve.cve_id) cve_times.append((cve.cve_id, '%.4f' % timer.get_exploit_scraping_time(), '%.4f' % timer.get_tree_build_time(), '%.4f' % timer.get_constraints_time()))
def get_CVEs(self, keyword: str, exact_match: bool=False) -> list: ''' Searches VulDB in order to fetch vulnerabilities related with the keyword. :param keyword: Query that will be performed against the VulDB server :param exact_match: results must match keyword exactly ''' # First, we need to check the arguments if not keyword or type(keyword) is not str: raise ValueError("keyword must be a non-empty string") while True: searchPayload = {'search': keyword, 'csrftoken': self.csrftoken } searchResponse = requests.post(VuldbScraper.VULDB_SEARCH_URI, searchPayload, cookies= self.cookie) if 'CSRF token invalid' in searchResponse.text: print("VulDB CSRF token invalid, regenerating") # regenerate tokens self.__init__() searchPayload = {'search': keyword, 'csrftoken': self.csrftoken } elif 'You have been using too many search requests lately' in searchResponse.text: print("[WARN] VulDB CVE search rate limited. Retrying. Try again later or disable Vuldb scraping") time.sleep(5) elif "DDoS Protection Message" in searchResponse.text: print("[WARN] Vuldb rate limit exceeded, for a few minutes. Retrying in 10 seconds") time.sleep(10) else: break soup = BeautifulSoup(searchResponse.text, "html.parser") tableEntries = soup.select_one('table').findChildren("tr", recursive=False) vulnerabilities = [] for entry in tableEntries: tableCell = entry.select_one('td:nth-child(4)') if tableCell: # Check if entry is not the table header titleConfiguration = tableCell.get('title') entryIdElem = tableCell.select_one('a') if entryIdElem: # Check if entry is not the table header entryVulName = entryIdElem.text if exact_match and keyword not in entryVulName: # If exact_match is true, skip item if not a exact keyword match continue entryId = entryIdElem.get('href')[4:] entryCVE = entry.select_one('a[target="cve"]').text.strip() vulnerabilities.append(CVE(entryCVE, sources= ["vuldb"], vul_name= entryVulName, vuldb_id= entryId, configurations= [titleConfiguration])) return vulnerabilities
def get_search_results(keyword: str): ''' Tries to retrieve search results from vulnerability databases, returns a list or None if record doesn't exist ''' # Simple search if "CVE-" in keyword: keyword = keyword.replace("CVE-", "").replace( "-", " ") # Cve search, search is different # Sanitize special characters keyword = keyword.replace(':', 'cc11').replace('.', 'pp22').replace( '*', 'ss33').replace('pp22ss33', 'pp22*') # So 1.* version string wildcards work query = Query(keyword).paging(0, 1000000) res = client.search(query) for doc in res.docs: sanitized = doc.configurations \ .replace("'",'"') \ .replace("True", "true") \ .replace("False", "false") \ .replace('cc11',':').replace('pp22','.').replace('ss33','*') \ .replace('\\\\','/bck') \ .replace('/bck"','') \ .replace('/bck','\\\\') # this is a hack to sanitize invalid json strings doc.configurations = jsonpickle.decode(sanitized) doc.description = doc.description.replace('cc11', ':').replace( 'pp22', '.').replace('ss33', '*').replace('-', '_') # Undo escaping finalRes = [ CVE(doc.id.replace('cve:', ''), vul_description=doc.description, sources=['nvd'], cpeConfigurations=doc.configurations) for doc in res.docs ] return finalRes # the result has the total number of results, and a list of documents '''print(res.total) # "2"
def get_CVEs(self, keyword:str, page_num:int = 0, exact_match:bool = False) -> list: ''' Searches NVD in order to fetch vulnerabilities related with the keyword. :param keyword: Query that will be performed against the NVD server :param page_num: Number of the page from where to extract CVEs. A query may produce more than one page of results. By default, page size is 20. :param exact_match: Whether the user wants to perform a search using exact keyword match ''' # First, we need to check the arguments if not keyword or type(keyword) is not str: raise ValueError("keyword must be a non-empty string") if type(page_num) is not int or page_num < 0: raise ValueError("page_num must be a non-negative integer") res = list() startIndex = page_num * 20 query_url = NvdScraper.VULN_QUERY_URI # If the user wants to perform a search where the keyword exactly matches if exact_match: query_url += "&queryType=phrase" # In some Python ENVs it is mandatory to provide a SSL context when accessing HTTPS sites # TODO: Change this to use user's OS built-in cas (pip certifi) context = ssl._create_unverified_context() # Sends an HTTPS request to NVD and constructs a BS Object # to analyse the page while True: req = Request(query_url.format(urllib.parse.quote(keyword), startIndex)) try: res_page = urlopen(req, context=context) break except: print("[WARN] NVD request failed for keyword {}. Possible rate limiting. Retrying".format(keyword)) time.sleep(5) soup = BeautifulSoup(res_page, "html.parser") # All CVEs are wrapped in a table (in fact the only one in the html) with an attribute # data-testid="vuln-results-table". Inside this table, they are found in <tr> tags. vulns_table = soup.find("table", {"data-testid":"vuln-results-table"}) # Had the table been found (= results were found), we would extract the CVEs if vulns_table: vulns = vulns_table.find_all("tr", {"data-testid" : re.compile("^vuln-row-")}) res.append(map(lambda v: CVE(v.th.strong.a.text.strip(), sources=["nvd"], vul_description=v.td.p.text), vulns)) else: print("No results were found in NVD database") # matchingRecords = int(soup.find("strong", {"data-testid":"vuln-matching-records-count"}).text.replace(',','')) matchingRecords = 0 if matchingRecords > 20 and page_num == 0: print("Found " + str(matchingRecords) + " results on NVD. Paginating") with ThreadPoolExecutor(max_workers=50) as pool: futures = [] for i in range(1, math.ceil(matchingRecords/20)): futureNvd = pool.submit(self.get_CVEs, keyword, exact_match=exact_match, page_num=i) futures.append(futureNvd) results = [x.result() for x in as_completed(futures)] totalList = [] for page in results: for result in page: totalList.append(result) return totalList else: return list(res[0])