def set_api(self): self.version_api = GitHubTagsAPI() asyncio.run(self.version_api.load_api(["nginx/nginx"])) # For some reason nginx tags it's releases are in the form of `release-1.2.3` # Chop off the `release-` part here. for index, version in enumerate(self.version_api.cache["nginx/nginx"]): self.version_api.cache["nginx/nginx"][index] = version.replace("release-", "")
def __enter__(self): super(IstioDataSource, self).__enter__() if not getattr(self, "_added_files", None): self._added_files, self._updated_files = self.file_changes( recursive=True, file_ext="md", subdir="./content/en/news/security" ) self.version_api = GitHubTagsAPI() self.set_api()
def setUpClass(cls): with open(TEST_DATA) as f: cls.data = f.read() data_source_cfg = {"etags": {}} cls.data_src = NginxDataSource(1, config=data_source_cfg) cls.data_src.version_api = GitHubTagsAPI( cache={"nginx/nginx": {"1.2.3", "1.7.0", "1.3.9", "0.7.52"}})
def setUpClass(cls): data_source_cfg = {"etags": {}} cls.data_src = ApacheHTTPDDataSource(1, config=data_source_cfg) known_versions = ["1.3.2", "1.3.1", "1.3.0"] cls.data_src.version_api = GitHubTagsAPI( cache={"apache/httpd": known_versions}) with open(TEST_DATA) as f: cls.data = json.load(f)
def test_to_advisory(self): data_source = ApacheKafkaDataSource(batch_size=1) data_source.version_api = GitHubTagsAPI( cache={"apache/kafka": ["2.1.2", "0.10.2.2"]}) expected_advisories = [ Advisory( summary= "In Apache Kafka versions between 0.11.0.0 and 2.1.0, it is possible to " "manually\n craft a Produce request which bypasses transaction/idempotent ACL " "validation.\n Only authenticated clients with Write permission on the " "respective topics are\n able to exploit this vulnerability. Users should " "upgrade to 2.1.1 or later\n where this vulnerability has been fixed.", impacted_package_urls=[ PackageURL( type="apache", namespace=None, name="kafka", version="0.10.2.2", qualifiers={}, subpath=None, ) ], resolved_package_urls=[ PackageURL( type="apache", namespace=None, name="kafka", version="2.1.2", qualifiers={}, subpath=None, ) ], references=[ Reference(url="https://kafka.apache.org/cve-list", reference_id=""), Reference( url= "https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-17196", reference_id="CVE-2018-17196", ), ], vulnerability_id="CVE-2018-17196", ) ] with open(TEST_DATA) as f: found_advisories = data_source.to_advisory(f) found_advisories = list(map(Advisory.normalized, found_advisories)) expected_advisories = list( map(Advisory.normalized, expected_advisories)) assert sorted(found_advisories) == sorted(expected_advisories)
def setUpClass(cls): data_source_cfg = { "repository_url": "", } cls.data_src = IstioDataSource(1, config=data_source_cfg) cls.data_src.version_api = GitHubTagsAPI({ "istio/istio": [ "1.1.0-rc.0", "1.1.0-rc.1", "1.1.0-rc.2", "1.1.0-rc.3", "1.1.0-rc.4", "1.1.0-rc.5", "1.1.0-rc.6", "1.1.0-snapshot.2", "1.1.0-snapshot.3", ] })
def setUpClass(cls): data_source_cfg = { "repository_url": "", } cls.data_src = IstioDataSource(1, config=data_source_cfg) cls.data_src.version_api = GitHubTagsAPI({ "istio/istio": [ "1.0.0", "1.1.0", "1.1.1", "1.1.17", "1.2.1", "1.2.7", "1.3.0", "1.3.1", "1.3.2", "1.9.1", ] })
class IstioDataSource(GitDataSource): def __enter__(self): super(IstioDataSource, self).__enter__() if not getattr(self, "_added_files", None): self._added_files, self._updated_files = self.file_changes( recursive=True, file_ext="md", subdir="./content/en/news/security" ) self.version_api = GitHubTagsAPI() self.set_api() def set_api(self): asyncio.run(self.version_api.load_api(["istio/istio"])) def updated_advisories(self) -> Set[Advisory]: files = self._updated_files advisories = [] for f in files: processed_data = self.process_file(f) if processed_data: advisories.extend(processed_data) return self.batch_advisories(advisories) def get_pkg_versions_from_ranges(self, version_range_list): """Takes a list of version ranges(affected) of a package as parameter and returns a tuple of safe package versions and vulnerable package versions""" all_version = self.version_api.get("istio/istio") safe_pkg_versions = [] vuln_pkg_versions = [] version_ranges = [RangeSpecifier(r) for r in version_range_list] for version in all_version: if any([version in v for v in version_ranges]): vuln_pkg_versions.append(version) safe_pkg_versions = set(all_version) - set(vuln_pkg_versions) return safe_pkg_versions, vuln_pkg_versions def get_data_from_yaml_lines(self, yaml_lines): """Return a mapping of data from a iterable of yaml_lines for example : ['title: ISTIO-SECURITY-2019-001', 'description: Incorrect access control.','cves: [CVE-2019-12243]'] would give {'title':'ISTIO-SECURITY-2019-001', 'description': 'Incorrect access control.', 'cves': '[CVE-2019-12243]'} """ return yaml.safe_load("\n".join(yaml_lines)) def get_yaml_lines(self, lines): """The istio advisory file contains lines similar to yaml format . This function extracts those lines and return an iterable of lines for example : lines = --- title: ISTIO-SECURITY-2019-001 description: Incorrect access control. cves: [CVE-2019-12243] --- get_yaml_lines(lines) would return ['title: ISTIO-SECURITY-2019-001','description: Incorrect access control.' ,'cves: [CVE-2019-12243]'] """ for index, line in enumerate(lines): line = line.strip() if line.startswith("---") and index == 0: continue elif line.endswith("---"): break else: yield line def process_file(self, path): advisories = [] data = self.get_data_from_md(path) releases = [] if data.get("releases"): for release in data["releases"]: # If it is of form "All versions prior to x" if "All releases" in release: release = release.strip() release = release.split(" ") releases.append("<" + release[4]) # If it is of form "a to b" elif "to" in release: release = release.strip() release = release.split(" ") lbound = ">=" + release[0] ubound = "<=" + release[2] releases.append(lbound + "," + ubound) # If it is a single release elif is_release(release): releases.append(release) data["release_ranges"] = releases if not data.get("cves"): data["cves"] = [""] for cve_id in data["cves"]: if not cve_id.startswith("CVE"): cve_id = "" safe_pkg_versions = [] vuln_pkg_versions = [] if not data.get("release_ranges"): data["release_ranges"] = [] safe_pkg_versions, vuln_pkg_versions = self.get_pkg_versions_from_ranges( data["release_ranges"] ) safe_purls_golang = { PackageURL(type="golang", name="istio", version=version) for version in safe_pkg_versions } safe_purls_github = { PackageURL(type="github", name="istio", version=version) for version in safe_pkg_versions } safe_purls = safe_purls_github.union(safe_purls_golang) vuln_purls_golang = { PackageURL(type="golang", name="istio", version=version) for version in vuln_pkg_versions } vuln_purls_github = { PackageURL(type="github", name="istio", version=version) for version in vuln_pkg_versions } vuln_purls = vuln_purls_github.union(vuln_purls_golang) advisories.append( Advisory( summary=data["description"], impacted_package_urls=vuln_purls, resolved_package_urls=safe_purls, vulnerability_id=cve_id, ) ) return advisories def get_data_from_md(self, path): """Return a mapping of vulnerability data from istio . The data is in the form of yaml_lines inside a .md file. """ with open(path) as f: yaml_lines = self.get_yaml_lines(f) return self.get_data_from_yaml_lines(yaml_lines) is_release = re.compile(r"^[\d.]+$", re.IGNORECASE).match
def set_api(self): self.version_api = GitHubTagsAPI() asyncio.run(self.version_api.load_api(["apache/kafka"]))
class ApacheKafkaDataSource(DataSource): @staticmethod def fetch_advisory_page(): page = requests.get(GH_PAGE_URL) return page.content def set_api(self): self.version_api = GitHubTagsAPI() asyncio.run(self.version_api.load_api(["apache/kafka"])) def updated_advisories(self): advisory_page = self.fetch_advisory_page() self.set_api() parsed_data = self.to_advisory(advisory_page) return self.batch_advisories(parsed_data) def to_advisory(self, advisory_page): advisories = [] advisory_page = BeautifulSoup(advisory_page, features="lxml") cve_section_beginnings = advisory_page.find_all("h2") for cve_section_beginning in cve_section_beginnings: cve_id = cve_section_beginning.text.split("\n")[0] cve_description_paragraph = cve_section_beginning.find_next_sibling( "p") cve_data_table = cve_section_beginning.find_next_sibling("table") cve_data_table_rows = cve_data_table.find_all("tr") affected_versions_row = cve_data_table_rows[0] fixed_versions_row = cve_data_table_rows[1] affected_version_ranges = to_version_ranges( affected_versions_row.find_all("td")[1].text) fixed_version_ranges = to_version_ranges( fixed_versions_row.find_all("td")[1].text) fixed_packages = [ PackageURL(type="apache", name="kafka", version=version) for version in self.version_api.get("apache/kafka") if any([ version in version_range for version_range in fixed_version_ranges ]) ] affected_packages = [ PackageURL(type="apache", name="kafka", version=version) for version in self.version_api.get("apache/kafka") if any([ version in version_range for version_range in affected_version_ranges ]) ] advisories.append( Advisory( vulnerability_id=cve_id, summary=cve_description_paragraph.text, impacted_package_urls=affected_packages, resolved_package_urls=fixed_packages, vuln_references=[ Reference(url=ASF_PAGE_URL), Reference( url= f"https://cve.mitre.org/cgi-bin/cvename.cgi?name={cve_id}", reference_id=cve_id, ), ], )) return advisories
class ApacheHTTPDDataSource(DataSource): CONFIG_CLASS = ApacheHTTPDDataSourceConfiguration base_url = "https://httpd.apache.org/security/json/" def set_api(self): self.version_api = GitHubTagsAPI() asyncio.run(self.version_api.load_api(["apache/httpd"])) def updated_advisories(self): links = fetch_links(self.base_url) self.set_api() advisories = [] for link in links: data = requests.get(link).json() advisories.append(self.to_advisory(data)) return self.batch_advisories(advisories) def to_advisory(self, data): cve = data["CVE_data_meta"]["ID"] descriptions = data["description"]["description_data"] description = None for desc in descriptions: if desc["lang"] == "eng": description = desc.get("value") break severities = [] impacts = data.get("impact", []) for impact in impacts: value = impact.get("other") if value: severities.append( VulnerabilitySeverity( system=scoring_systems["apache_httpd"], value=value, ) ) break reference = Reference( reference_id=cve, url=urllib.parse.urljoin(self.base_url, f"{cve}.json"), severities=severities, ) versions_data = [] for vendor in data["affects"]["vendor"]["vendor_data"]: for products in vendor["product"]["product_data"]: for version_data in products["version"]["version_data"]: versions_data.append(version_data) fixed_version_ranges, affected_version_ranges = self.to_version_ranges(versions_data) affected_packages = [] fixed_packages = [] for version_range in fixed_version_ranges: fixed_packages.extend( [ PackageURL(type="apache", name="httpd", version=version) for version in self.version_api.get("apache/httpd") if MavenVersion(version) in version_range ] ) for version_range in affected_version_ranges: affected_packages.extend( [ PackageURL(type="apache", name="httpd", version=version) for version in self.version_api.get("apache/httpd") if MavenVersion(version) in version_range ] ) return Advisory( vulnerability_id=cve, summary=description, affected_packages=nearest_patched_package(affected_packages, fixed_packages), references=[reference], ) def to_version_ranges(self, versions_data): fixed_version_ranges = [] affected_version_ranges = [] for version_data in versions_data: version_value = version_data["version_value"] range_expression = version_data["version_affected"] if range_expression == "<": fixed_version_ranges.append( VersionSpecifier.from_scheme_version_spec_string( "maven", ">={}".format(version_value) ) ) elif range_expression == "=" or range_expression == "?=": affected_version_ranges.append( VersionSpecifier.from_scheme_version_spec_string( "maven", "{}".format(version_value) ) ) return (fixed_version_ranges, affected_version_ranges)
class NginxDataSource(DataSource): CONFIG_CLASS = NginxDataSourceConfiguration url = "http://nginx.org/en/security_advisories.html" def set_api(self): self.version_api = GitHubTagsAPI() asyncio.run(self.version_api.load_api(["nginx/nginx"])) # For some reason nginx tags it's releases are in the form of `release-1.2.3` # Chop off the `release-` part here. for index, version in enumerate(self.version_api.cache["nginx/nginx"]): self.version_api.cache["nginx/nginx"][index] = version.replace("release-", "") def updated_advisories(self): advisories = [] self.set_api() data = requests.get(self.url).content advisories.extend(self.to_advisories(data)) return self.batch_advisories(advisories) def to_advisories(self, data): advisories = [] soup = BeautifulSoup(data, features="lxml") vuln_list = soup.select("li p") # Example value of `vuln_list` : # ['Excessive CPU usage in HTTP/2 with small window updates', # <br/>, # 'Severity: medium', # <br/>, # <a href="http://mailman.nginx.org/pipermail/nginx-announce/2019/000249.html">Advisory</a>, # nopep8 # <br/>, # <a href="http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-9511">CVE-2019-9511</a>, # <br/>, # 'Not vulnerable: 1.17.3+, 1.16.1+', # <br/>, # 'Vulnerable: 1.9.5-1.17.2'] for vuln_info in vuln_list: references = [] for index, child in enumerate(vuln_info.children): if index == 0: # type of this child is bs4.element.NavigableString. # Hence cast it into standard string summary = str(child) continue # hasattr(child, "attrs") == False for bs4.element.NavigableString if hasattr(child, "attrs") and child.attrs.get("href"): link = child.attrs["href"] references.append(Reference(url=link)) if "cve.mitre.org" in link: cve_id = child.text continue if "Not vulnerable" in child: fixed_packages = self.extract_fixed_pkgs(child) continue if "Vulnerable" in child: vulnerable_packages = self.extract_vuln_pkgs(child) continue advisories.append( Advisory( vulnerability_id=cve_id, summary=summary, affected_packages=nearest_patched_package(vulnerable_packages, fixed_packages), ) ) return advisories def extract_fixed_pkgs(self, vuln_info): vuln_status, version_info = vuln_info.split(": ") if "none" in version_info: return {} raw_ranges = version_info.split(",") version_ranges = [] for rng in raw_ranges: # Eg. "1.7.3+" gets converted to VersionSpecifier.from_scheme_version_spec_string("semver","^1.7.3") # The advisory in this case uses `+` in the sense that any version # with greater or equal `minor` version satisfies the range. # "1.7.4" satisifes "1.7.3+", but "1.8.4" does not. "1.7.3+" has same # semantics as that of "^1.7.3" version_ranges.append( VersionSpecifier.from_scheme_version_spec_string("semver", "^" + rng[:-1]) ) valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges) return [ PackageURL(type="generic", name="nginx", version=version) for version in valid_versions ] def extract_vuln_pkgs(self, vuln_info): vuln_status, version_infos = vuln_info.split(": ") if "none" in version_infos: return {} version_ranges = [] windows_only = False for version_info in version_infos.split(", "): if version_info == "all": # This is misleading since eventually some version get fixed. continue if "-" not in version_info: # These are discrete versions version_ranges.append( VersionSpecifier.from_scheme_version_spec_string("semver", version_info[0]) ) continue windows_only = "nginx/Windows" in version_info version_info = version_info.replace("nginx/Windows", "") lower_bound, upper_bound = version_info.split("-") version_ranges.append( VersionSpecifier.from_scheme_version_spec_string( "semver", f">={lower_bound},<={upper_bound}" ) ) valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges) qualifiers = {} if windows_only: qualifiers["os"] = "windows" return [ PackageURL(type="generic", name="nginx", version=version, qualifiers=qualifiers) for version in valid_versions ]