def parse_mysql_advisory(url): # The url passed in can be either the main CPU page, or the "Text Form of # Risk Matrices" (aka verbose) page # Parse url first to get base url and cpu date url_match = re.search(r'/cpu([a-z]{3})(\d{4})(?:verbose)?\.html(?:#.*)?$', url) if not url_match: raise AdvisoryParserTextException('Unexpected CPU URL format.') # Get base url and determine advisory_url and verbose_url url = url[0 : url_match.start() + len('/cpuMMMYYYY')] advisory_url = url + '.html#AppendixMSQL' verbose_url = url + 'verbose.html' # Extract the CPU's month and year from the URL since the verbose page has # no dates on it month, year = url_match.groups() cpu_date = _nearest_tuesday(int(year), month) advisory_id = 'CPU {} {}'.format(month.capitalize(), year) # Fetch the CPU verbose page advisory_html = get_request(verbose_url) soup = bs4.BeautifulSoup(advisory_html, 'html.parser') mysql_table = soup.find(id='MSQL').find_next('table') # The first row is the table header so throw that one away table_rows = mysql_table.find_all('tr')[1:] mariadb_cve_map = create_mariadb_cve_map() flaws, warnings = [], [] for row in table_rows: # First anchor id contains the CVE cve = row.find('a').get('id') # Second td contains a description description_cell = row.find_all('td')[1].contents # Join all contents of the cell into one string description = [] for element in description_cell: if isinstance(element, bs4.element.NavigableString) and element.string: description.append(element.string) elif isinstance(element, bs4.element.Tag) and element.text: description.append(element.text) description = '\n'.join(description) # Take the text part only, i.e. anything before the CVSS string description, cvss_text = re.split(r'\n\s*CVSS v?3\.[0-9] (?=Base Score)', description) # Filter out some whitespace description = description.replace('\n', ' ').replace(' ', ' ').strip() product = re.search(r'^Vulnerability in the (.+) (component|product) of ', description) if not product: warnings.append('ERROR: Could not identify product in {}; skipping:\n\n{}\n---' .format(cve, description)) continue if 'MySQL Server' not in product.group(1) and 'MySQL Client' not in product.group(1): warnings.append('ERROR: Skipping {}; does not affect MySQL Server or Client component' .format(cve)) continue # Filter out the lines that start with CVSS and find the score + vector match = re.search(r'Base Score\s*(\d?\d\.\d).*Vector:\s*\(([^\)]+)\)', cvss_text) if not match: cvss3 = None warnings.append('Could not parse CVSSv3 score from {} description'.format(cve)) else: cvss3_score = match.group(1) cvss3 = cvss3_score + '/' + match.group(2) x = float(cvss3_score) if 0.0 < x < 4.0: impact = 'low' elif 4.0 <= x < 7.0: impact = 'moderate' elif 7.0 <= x < 9.0: impact = 'important' else: impact = 'critical' component = re.search(r'\((sub)?component: ([^\)]+\)?)\)', description).group(2) summary = ('mysql: {} unspecified vulnerability ({})'.format(component, advisory_id)) # Flaw descriptions contain vulnerable versions. Fixed versions are usually # one version higher. vulnerable_versions = VERSION_REGEX.findall(description) mysql_fixed_in = [] for version in vulnerable_versions: fixed_version = '{}.{}'.format(version.rsplit('.', 1)[0], int(version.split('.')[-1]) + 1) mysql_fixed_in.append(fixed_version) fixed_in = {'mysql': mysql_fixed_in} mariadb_fixed_in = mariadb_cve_map.get(cve) if mariadb_fixed_in: fixed_in['mariadb'] = mariadb_fixed_in flaws.append(Flaw( cves=[cve], summary=summary, public_date=cpu_date, cvss3=cvss3, impact=impact, description=description, fixed_in=fixed_in, from_url=advisory_url, advisory_id=advisory_id, )) return flaws, warnings
def parse_chrome_advisory(url): advisory_text = get_text_from_url(url) # Workaround for advisories that do not use <div>s for each CVE entry. E.g.: # https://chromereleases.googleblog.com/2018/04/stable-channel-update-for-desktop.html advisory_text = re.sub(r'(.)\[\$', r'\1\n[$', advisory_text) if 'Security Fixes' not in advisory_text: raise AdvisoryParserTextException( 'No security fixes found in {}'.format(url)) # Throw away parts of the text after the blog post flaws_text = advisory_text.split('Labels:\nStable updates')[0].strip() # Parse out public date match = re.search('^Stable Channel Update for Desktop\n(.+)', flaws_text, re.MULTILINE) if not match: raise AdvisoryParserTextException( 'Could not find public date in {}'.format(url)) try: public_date = datetime.strptime(match.group(1), "%A, %B %d, %Y") except ValueError: raise AdvisoryParserTextException( 'Could not parse public date ({}) from {}'.format( match.group(1), url)) # Find Chrome version, e.g. 46.0.2490.71 try: fixed_in = re.search(r'\d{2}\.\d\.\d{4}\.\d{2,3}', flaws_text).group(0) except ValueError: raise AdvisoryParserTextException( 'Could not find fixed-in version in {}'.format(url)) # Filter out lines that contain CVEs cve_lines = [ line.strip() for line in flaws_text.split('\n') if CVE_REGEX.search(line) ] if not cve_lines: raise AdvisoryParserTextException( 'Could not find any CVEs in {}'.format(url)) flaws, warnings = [], [] for line in cve_lines: # Parse each line containing information about a CVE, e.g.: # [$7500][590275] High CVE-2016-1652: XSS in X. Credit to anonymous. # First, split into two groups by first encountered colon. metadata, text = line.split(':') if not metadata or not text: warnings.append('Could not parse line: {}'.format(line)) continue # If a line contains Various, it describes internal fixes, e.g.: # [563930] CVE-2015-6787: Various fixes from internal audits... if 'Various' in text: impact = 'important' else: match = re.search(r'(Critical|High|Medium|Low)', metadata) if not match: print('Could not find impact; skipping: {}'.format(line)) continue else: impact = match.group(1) impact = impact.lower() impact = impact.replace('high', 'important') impact = impact.replace('medium', 'moderate') bug_ids = re.findall(r'\d{6,}', metadata) cves = CVE_REGEX.findall(metadata) if not bug_ids and not cves: warnings.append( 'Could not find CVEs or bugs; skipping: {}'.format(line)) continue summary = text.split('.')[0].strip() if ' in ' in summary: issue, component = summary.split(' in ', 1) article = 'An' if issue.lower()[0] in 'aeiou' else 'A' description = ( '{} {} flaw was found in the {} component of the Chromium browser.' .format(article, issue.lower(), component)) elif 'various fixes' in summary.lower(): description = summary + '.' summary = 'various fixes from internal audits' else: description = ( 'The following flaw was identified in the Chromium browser: {}.' .format(summary)) summary = 'chromium-browser: ' + summary description += '\n\nUpstream bug(s):\n' for bug in bug_ids: description += '\nhttps://code.google.com/p/chromium/issues/detail?id=' + bug com_url = url if 'blogspot.com' in url else re.sub( r'blogspot\.[^/]*/', 'blogspot.com/', url) cvss3 = CVSS3_MAP[impact] flaws.append( Flaw(from_url=com_url, cves=cves, summary=summary, public_date=public_date, cvss3=cvss3, impact=impact, fixed_in={'chromium-browser': [fixed_in]}, description=description)) return flaws, warnings
def parse_flash_advisory(url): advisory_html = get_request(url) soup = BeautifulSoup(advisory_html, "html.parser") # Get the advisory ID and public date from the first table details_table = soup.find("div", { "class": "page-description" }).find_next("table") table_rows = details_table.find_all("tr") # The first row is the header, the second contains the data we need advisory_id, public_date, _ = [ elem.get_text().strip() for elem in table_rows[1].find_all("td") ] try: public_date = datetime.strptime(public_date, "%B %d, %Y") except ValueError: raise AdvisoryParserTextException( "Could not parse public date ({}) from {}".format( public_date, url)) # Get the fixed_in version from the Solution table solution_table = soup.find(id="solution").find_next("table") table_rows = solution_table.find_all("tr") fixed_in = [] for row in table_rows: _, version, platform, _, _ = [ elem.get_text() for elem in row.find_all("td") ] if platform == "Linux": fixed_in.append(version) break # Get CVE information from the Vulnerability details table vulns_table = soup.find(id="Vulnerabilitydetails").find_next("table") table_rows = vulns_table.find_all("tr") # Loop over every row (excluding the header) and extract flaw data; group by vuln impact vuln_data = [] for row in table_rows[1:]: vuln_category, vuln_impact, severity, cve = [ elem.get_text().strip().replace(u"\xa0", u" ") for elem in row.find_all("td") ] vuln_data.append((vuln_impact, vuln_category, severity, cve)) flaws, warnings = [], [] for vuln_impact, group_1 in groupby(sorted(vuln_data), lambda x: x[0]): data = list( group_1 ) # Need a copy of the generator to loop over multiple times highest_severity = sorted([entry[2].lower() for entry in data], key=SEVERITY_ORDER.index)[0] all_cves = [entry[3] for entry in data] flaw_summaries = [] for vuln_category, group_2 in groupby(sorted(data), lambda x: x[1]): cves = [entry[3] for entry in group_2] flaw_summaries.append("{} -- {}".format(vuln_category, ", ".join(cves))) description = DESCRIPTION_TEMPLATE.format( advisory_id=advisory_id, vuln_impact=vuln_impact.lower(), number_of_flaws="multiple flaws" if len(flaw_summaries) > 1 else "a flaw", flaw_summaries="\n".join(flaw_summaries), ) summary = "flash-plugin: {} {} ({})".format( vuln_impact, "vulnerability" if len(all_cves) == 1 else "vulnerabilities", advisory_id) flaws.append( Flaw( from_url=url, public_date=public_date, cves=list(all_cves), fixed_in={"flash-plugin": fixed_in}, summary=summary, impact=highest_severity, description=description, cvss3=CVSS_MAP.get( vuln_impact.lower(), "8.8/CVSS:3.0/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H"), advisory_id=advisory_id, )) return flaws, warnings
def parse_flash_advisory(url): advisory_html = get_request(url) soup = BeautifulSoup(advisory_html, 'html.parser') # Get the advisory ID and public date from the first table details_table = soup.find('div', {'class': 'page-description'}).find_next('table') table_rows = details_table.find_all('tr') # The first row is the header, the second contains the data we need advisory_id, public_date, _ = [elem.get_text() for elem in table_rows[1].find_all('td')] try: public_date = datetime.strptime(public_date, '%B %d, %Y') except ValueError: raise AdvisoryParserTextException( 'Could not parse public date ({}) from {}'.format(public_date, url) ) # Get the fixed_in version from the Solution table solution_table = soup.find(id='solution').find_next('table') table_rows = solution_table.find_all('tr') fixed_in = [] for row in table_rows: _, version, platform, _, _ = [elem.get_text() for elem in row.find_all('td')] if platform == 'Linux': fixed_in.append(version) break # Get CVE information from the Vulnerability details table vulns_table = soup.find(id='Vulnerabilitydetails').find_next('table') table_rows = vulns_table.find_all('tr') # Loop over every row (excluding the header) and extract flaw data; group by vuln impact vuln_data = [] for row in table_rows[1:]: vuln_category, vuln_impact, severity, cve = [elem.get_text().strip().replace(u'\xa0', u' ') for elem in row.find_all('td')] vuln_data.append((vuln_impact, vuln_category, severity, cve)) flaws, warnings = [], [] for vuln_impact, group_1 in groupby(sorted(vuln_data), lambda x: x[0]): data = list(group_1) # Need a copy of the generator to loop over multiple times highest_severity = sorted([entry[2].lower() for entry in data], key=SEVERITY_ORDER.index)[0] all_cves = [entry[3] for entry in data] flaw_summaries = [] for vuln_category, group_2 in groupby(sorted(data), lambda x: x[1]): cves = [entry[3] for entry in group_2] flaw_summaries.append('{} -- {}'.format(vuln_category, ', '.join(cves))) description = DESCRIPTION_TEMPLATE.format( advisory_id=advisory_id, vuln_impact=vuln_impact.lower(), number_of_flaws='multiple flaws' if len(flaw_summaries) > 1 else 'a flaw', flaw_summaries='\n'.join(flaw_summaries), ) summary = ('flash-plugin: {} {} ({})' .format(vuln_impact, 'vulnerability' if len(all_cves) == 1 else 'vulnerabilities', advisory_id)) flaws.append(Flaw( from_url=url, public_date=public_date, cves=list(all_cves), fixed_in={'flash-plugin': fixed_in}, summary=summary, impact=highest_severity, description=description, cvss3=CVSS_MAP.get(vuln_impact.lower(), '8.8/CVSS:3.0/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H'), advisory_id=advisory_id, )) return flaws, warnings
def parse_mysql_advisory(url): if 'verbose' not in url: raise AdvisoryParserTextException( 'Please provide a verbose URL, e.g.: ' 'http://www.oracle.com/technetwork/security-advisory/cpuoct2016verbose-2881725.html' ) advisory_html = get_request(url) soup = bs4.BeautifulSoup(advisory_html, 'html.parser') mysql_table = soup.find(id='MSQL').find_next('table') # The first row is the table header so throw that one away table_rows = mysql_table.find_all('tr')[1:] advisory_url = table_rows[0].find('a', text='Advisory')['href'] # Extract the CPU's month and year from the URL since the page has no dates on it. date_match = re.search(r'/cpu([a-z]{3})(\d{4})verbose', url) if not date_match: raise AdvisoryParserTextException('Could not parse date from CPU URL.') month, year = date_match.groups() cpu_date = _nearest_tuesday(int(year), month) advisory_id = 'CPU {} {}'.format(month.capitalize(), year) mariadb_cve_map = create_mariadb_cve_map() flaws, warnings = [], [] for row in table_rows: # First anchor hyperlink contains the CVE cve = row.find('a').string # Second td contains a description description_cell = row.find_all('td')[1].contents # Join all contents of the cell into one string description = [] for element in description_cell: if isinstance(element, bs4.element.NavigableString) and element.string: description.append(element.string) elif isinstance(element, bs4.element.Tag) and element.text: description.append(element.text) description = '\n'.join(description) # Take the text part only, i.e. anything before the CVSS string description, cvss_text = re.split('\n\s*CVSS v3', description) # Filter out some whitespace description = description.replace('\n', ' ').replace(' ', ' ').strip() product = re.search( r'^Vulnerability in the (.+) (component|product) of ', description) if not product: warnings.append( 'ERROR: Could not identify product in {}; skipping:\n\n{}\n---' .format(cve, description)) continue if 'MySQL Server' not in product.group(1): warnings.append( 'ERROR: Skipping {}; does not affect MySQL Server component'. format(cve)) continue # Filter out the lines that start with CVSS and find the score + vector match = re.search(r'Score\s*(\d?\d\.\d).*Vector:\s*\(([^\)]+)\)', cvss_text) if not match: cvss3 = None warnings.append( 'Could not parse CVSSv3 score from {} description'.format(cve)) else: cvss3_score = match.group(1) cvss3 = cvss3_score + '/' + match.group(2) x = float(cvss3_score) if 0.0 < x < 4.0: impact = 'low' elif 4.0 <= x < 7.0: impact = 'moderate' elif 7.0 <= x < 9.0: impact = 'important' else: impact = 'critical' component = re.search(r'\((sub)?component: ([^\)]+\)?)\)', description).group(2) summary = ('mysql: {} unspecified vulnerability ({})'.format( component, advisory_id)) # Flaw descriptions contain vulnerable versions. Fixed versions are usually # one version higher. vulnerable_versions = VERSION_REGEX.findall(description) mysql_fixed_in = [] for version in vulnerable_versions: fixed_version = '{}.{}'.format( version.rsplit('.', 1)[0], int(version.split('.')[-1]) + 1) mysql_fixed_in.append(fixed_version) fixed_in = {'mysql': mysql_fixed_in} mariadb_fixed_in = mariadb_cve_map.get(cve) if mariadb_fixed_in: fixed_in['mariadb'] = mariadb_fixed_in flaws.append( Flaw( cves=[cve], summary=summary, public_date=cpu_date, cvss3=cvss3, impact=impact, description=description, fixed_in=fixed_in, from_url=advisory_url, advisory_id=advisory_id, )) return flaws, warnings
def parse_chrome_advisory(url): advisory_text = get_text_from_url(url) # Workaround for advisories that do not use <div>s for each CVE entry. E.g.: # https://chromereleases.googleblog.com/2018/04/stable-channel-update-for-desktop.html advisory_text = re.sub(r"(.)\[\$", r"\1\n[$", advisory_text) if "Security Fixes" not in advisory_text: raise AdvisoryParserTextException( "No security fixes found in {}".format(url)) # Throw away parts of the text after the blog post flaws_text = advisory_text.split("Labels:\nStable updates")[0].strip() # Parse out public date match = re.search("^Stable Channel Update for Desktop\n(.+)", flaws_text, re.MULTILINE) if not match: raise AdvisoryParserTextException( "Could not find public date in {}".format(url)) try: public_date = datetime.strptime(match.group(1), "%A, %B %d, %Y") except ValueError: raise AdvisoryParserTextException( "Could not parse public date ({}) from {}".format( match.group(1), url)) # Find Chrome version, e.g. 46.0.2490.71 try: fixed_in = re.search(r"\d{2}\.\d\.\d{4}\.\d{2,3}", flaws_text).group(0) except ValueError: raise AdvisoryParserTextException( "Could not find fixed-in version in {}".format(url)) # Filter out lines that contain CVEs cve_lines = [ line.strip() for line in flaws_text.split("\n") if CVE_REGEX.search(line) ] if not cve_lines: raise AdvisoryParserTextException( "Could not find any CVEs in {}".format(url)) flaws, warnings = [], [] for line in cve_lines: # Parse each line containing information about a CVE, e.g.: # [$7500][590275] High CVE-2016-1652: XSS in X. Credit to anonymous. # First, split into two groups by first encountered colon. metadata, text = line.split(":", maxsplit=1) if not metadata or not text: warnings.append("Could not parse line: {}".format(line)) continue # If a line contains Various, it describes internal fixes, e.g.: # [563930] CVE-2015-6787: Various fixes from internal audits... if "Various" in text: impact = "important" else: match = re.search(r"(Critical|High|Medium|Low)", metadata) if not match: print("Could not find impact; skipping: {}".format(line)) continue else: impact = match.group(1) impact = impact.lower() impact = impact.replace("high", "important") impact = impact.replace("medium", "moderate") bug_ids = re.findall(r"\d{6,}", metadata) cves = CVE_REGEX.findall(metadata) if not bug_ids and not cves: warnings.append( "Could not find CVEs or bugs; skipping: {}".format(line)) continue summary = text.split(".")[0].strip() if " in " in summary: issue, component = summary.split(" in ", 1) article = "An" if issue.lower()[0] in "aeiou" else "A" description = ( "{} {} flaw was found in the {} component of the Chromium browser." .format(article, issue.lower(), component)) elif "various fixes" in summary.lower(): description = summary + "." summary = "various fixes from internal audits" else: description = "The following flaw was identified in the Chromium browser: {}.".format( summary) summary = "chromium-browser: " + summary description += "\n\nUpstream bug(s):\n" for bug in bug_ids: description += "\nhttps://code.google.com/p/chromium/issues/detail?id=" + bug com_url = (url if "blogspot.com" in url else re.sub( r"blogspot\.[^/]*/", "blogspot.com/", url)) cvss3 = CVSS3_MAP[impact] flaws.append( Flaw( from_url=com_url, cves=cves, summary=summary, public_date=public_date, cvss3=cvss3, impact=impact, fixed_in={"chromium-browser": [fixed_in]}, description=description, )) return flaws, warnings