def test_write_read_records(self, get_tmpdir): tmpdir = get_tmpdir r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata( data_write_dir=tmpdir)) r.initialize() ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) r.metadata.download_result = DownloadOperationResult( started=ts, status=FeedDownloader.State.in_progress.value, results=[]) r.write_data( "feed1", "group1", chunk_id=0, data= b'{"next_token": "something", "data": [{"somekey": "somevalue"}]}', ) with timer("Read single record group", log_level="info"): found_count = 0 for i in r.read("feed1", "group1", start_index=0): logger.info("Got record {}".format(i)) found_count += 1 logger.info("Repo metadata: {}".format(r.metadata)) assert found_count > 0
def _scan_distro_packages_by_cpe(self, image: Image) -> List[CpeMatch]: # Match the distro packages with timer("os cpe matcher", log_level="debug"): db = get_thread_scoped_session() db.refresh(image) os_matches = self._match_distro_packages_by_cpe(image) return os_matches
def _scan_image_cpes( self, image: Image, ) -> List[Tuple]: """ Similar to the vulnerabilities function, but using the cpe matches instead, basically the NVD raw data source :return: list of (image_cpe, cpe_vulnerability) tuples """ with timer("non-os cpe matcher", log_level="debug"): return self.db_manager.query_image_application_vulnerabilities( self.cpe_cls, image )
def get_cpe_vulnerabilities( self, image: Image, nvd_cls: type = NvdV2Metadata, cpe_cls: type = CpeV2Vulnerability, ): if nvd_distro_matching_enabled and is_fix_only_distro(image.distro_name): matcher = DistroEnabledCpeMatcher(nvd_cls, cpe_cls) else: matcher = NonOSCpeMatcher(nvd_cls, cpe_cls) with timer("Image vulnerability cpe lookups", log_level="debug"): matches = matcher.image_cpe_vulnerabilities(image) return matches
def test_LocalFeedDataRepo(): tmpdir = tempfile.mkdtemp(prefix="anchoretest_repo-") r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(data_write_dir=tmpdir)) try: assert os.listdir(tmpdir) == [] r.initialize() assert os.listdir(tmpdir) == ["metadata.json"] ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) r.metadata.download_result = DownloadOperationResult( started=ts, status=FeedDownloader.State.in_progress.value, results=[] ) r.flush_metadata() r.metadata = None r.reload_metadata() assert r.metadata.download_result.started == ts assert ( r.metadata.download_result.status == FeedDownloader.State.in_progress.value ) r.write_data( "feed1", "group1", chunk_id=0, data=b'{"next_token": "something", "data": [{"somekey": "somevalue"}]}', ) with timer("Read single record group", log_level="info"): found_count = 0 for i in r.read("feed1", "group1", start_index=0): logger.info("Got record {}".format(i)) found_count += 1 logger.info("Repo metadata: {}".format(r.metadata)) assert found_count > 0 finally: logger.info("Done with repo test") r.teardown()
def execute(self): with timer( "image vulnerabilities refresh for %s/%s" % (self.message.account_id, self.message.image_id), log_level="info", ): logger.debug( "Refreshing image vulnerabilities report for account_id=%s, image_id=%s, image_digest=%s", self.message.account_id, self.message.image_id, self.message.image_digest, ) with session_scope() as session: img = (session.query(Image).filter( Image.user_id == self.message.account_id, Image.id == self.message.image_id, ).one_or_none()) # lookup image first if not img: logger.debug( "No record found for image account=%s, image_id=%s, skipping refresh", self.message.account_id, self.message.image_id, ) return # call the provider with vendor_only and force disabled get_vulnerabilities_provider().get_image_vulnerabilities_json( image=img, vendor_only=False, db_session=session, force_refresh=False, use_store=True, )
def execute(self, feed_name=None, group_name=None) -> LocalFeedDataRepo: """ Uses the parent method to get the full set of data and spool it to disk, then feeds it to the caller one page at a time. :param feed: :param group: :param since: :param next_token: :return: """ try: self.local_repo.initialize() self.local_repo.metadata.download_result = _download_start_metadata( ) self.local_repo.flush_metadata() except: logger.debug_exception( "Could not initialize the feed data download location: {}. Failing fetch attempt" .format(self.local_repo.root_dir)) raise groups_failed = 0 try: for group in self.config.groups: if (feed_name and group.feed != feed_name or (group_name and group_name != group.group)): # Skip groups that don't match if a specific group was requested logger.debug( "Download configuration has record for group {}/{} but only {}/{} requested, so skipping" .format(group.feed, group.group, feed_name, group_name)) continue meta = _group_download_start_metadata(group) record_count = 0 try: self.local_repo.metadata.download_result.results.append( meta) self.local_repo.flush_metadata() logger.info("Downloading data for group {}/{}".format( group.feed, group.group)) with timer( "data download for group {}/{}".format( group.feed, group.group), log_level="info", ): for count, group_metadata in self._fetch_group_data( group): record_count += count meta.total_records = record_count meta.group_metadata.update(group_metadata) self.local_repo.flush_metadata() _update_download_complete(meta, record_count) except: logger.exception( "Error downloading data for group {}/{}".format( group.feed, group.group)) # Ensure consistent state for next phase, so cleanup anything failed _update_download_failed(meta, record_count) groups_failed += 1 finally: self.local_repo.flush_metadata() if groups_failed > 0: self.local_repo.metadata.download_result.status = ( FeedDownloader.State.failed.value) else: self.local_repo.metadata.download_result.status = ( FeedDownloader.State.complete.value) except: logger.debug_exception( "Error fetching feed data, setting status to failed for operation {}" .format(self.config.uuid)) self.local_repo.metadata.download_result.status = ( FeedDownloader.State.failed.value) raise finally: logger.info("Feed data download process ending") self.local_repo.metadata.download_result.ended = datetime.datetime.utcnow( ) self.local_repo.flush_metadata() return self.local_repo
def get_cpe_vulnerabilities(self, image, nvd_cls: type, cpe_cls: type): with timer("Image vulnerability cpe lookups", log_level="debug"): return self.dedup_cpe_vulnerabilities( image.cpe_vulnerabilities(_nvd_cls=nvd_cls, _cpe_cls=cpe_cls))
def test_feed_downloader(): """ Requires network access to the public feed service ancho.re :return: """ groups_to_sync = [ GroupDownloadOperationConfiguration( feed='vulnerabilities', group='alpine:3.7', parameters=GroupDownloadOperationParams(since=None)), GroupDownloadOperationConfiguration( feed='vulnerabilities', group='alpine:3.8', parameters=GroupDownloadOperationParams(since=None)), #GroupDownloadOperationConfiguration(feed='nvdv2', group='nvdv2:cves', parameters=GroupDownloadOperationParams(since=None)) ] dl_conf = DownloadOperationConfiguration(groups=groups_to_sync, uuid=uuid.uuid4().hex, source_uri=ANCHOREIO_URI) tmpdir = tempfile.mkdtemp(prefix='anchoretest_repo-') data_repo = None try: client = get_client(ANCHOREIO_URI, user=('something', 'something'), conn_timeout=1, read_timeout=30) fetcher = FeedDownloader(download_root_dir=tmpdir, config=dl_conf, client=client, fetch_all=False) with timer('feed download', log_level='info'): data_repo = fetcher.execute() assert data_repo is not None assert data_repo.root_dir.startswith(tmpdir) assert data_repo.metadata.data_write_dir.startswith(tmpdir) assert os.path.isdir(data_repo.metadata.data_write_dir) assert os.path.isdir(data_repo.root_dir) assert len(os.listdir(tmpdir)) > 0 count = 0 with timer('alpine 3.8 iterate', log_level='info'): for _ in data_repo.read('vulnerabilities', 'alpine:3.8', 0): count += 1 assert count == sum([ x.total_records for x in data_repo.metadata.download_result.results if x.feed == 'vulnerabilities' and x.group == 'alpine:3.8' ]) with timer('alpine 3.7 iterate', log_level='info'): for _ in data_repo.read('vulnerabilities', 'alpine:3.7', 0): count += 1 assert count == sum([ x.total_records for x in data_repo.metadata.download_result.results ]) finally: logger.info('Cleaning up temp dir') if data_repo: data_repo.teardown()
def test_feed_downloader(): """ Requires network access to the public feed service ancho.re :return: """ groups_to_sync = [ GroupDownloadOperationConfiguration( feed="vulnerabilities", group="alpine:3.7", parameters=GroupDownloadOperationParams(since=None), ), GroupDownloadOperationConfiguration( feed="vulnerabilities", group="alpine:3.8", parameters=GroupDownloadOperationParams(since=None), ), # GroupDownloadOperationConfiguration(feed='nvdv2', group='nvdv2:cves', parameters=GroupDownloadOperationParams(since=None)) ] dl_conf = DownloadOperationConfiguration(groups=groups_to_sync, uuid=uuid.uuid4().hex, source_uri=ANCHOREIO_URI) tmpdir = tempfile.mkdtemp(prefix="anchoretest_repo-") data_repo = None try: client = get_feeds_client( SyncConfig( enabled=True, url=ANCHOREIO_URI, username="******", password="******", connection_timeout_seconds=1, read_timeout_seconds=30, )) fetcher = FeedDownloader(download_root_dir=tmpdir, config=dl_conf, client=client, fetch_all=False) with timer("feed download", log_level="info"): data_repo = fetcher.execute() assert data_repo is not None assert data_repo.root_dir.startswith(tmpdir) assert data_repo.metadata.data_write_dir.startswith(tmpdir) assert os.path.isdir(data_repo.metadata.data_write_dir) assert os.path.isdir(data_repo.root_dir) assert len(os.listdir(tmpdir)) > 0 count = 0 with timer("alpine 3.8 iterate", log_level="info"): for _ in data_repo.read("vulnerabilities", "alpine:3.8", 0): count += 1 assert count == sum([ x.total_records for x in data_repo.metadata.download_result.results if x.feed == "vulnerabilities" and x.group == "alpine:3.8" ]) with timer("alpine 3.7 iterate", log_level="info"): for _ in data_repo.read("vulnerabilities", "alpine:3.7", 0): count += 1 assert count == sum([ x.total_records for x in data_repo.metadata.download_result.results ]) finally: logger.info("Cleaning up temp dir") if data_repo: data_repo.teardown()
def execute(self): """ Execute a load. Fetch from the catalog and send to loader. :return: the ImageLoad result object including the image object and its vulnerabilities or None if image already found """ self.start_time = datetime.datetime.utcnow() try: db = get_session() img = db.query(Image).get((self.image_id, self.user_id)) if img is not None: if not self.force_reload: logger.info( "Image {}/{} already found in the system. Will not re-load." .format(self.user_id, self.image_id)) db.close() return None else: logger.info( "Deleting image {}/{} and all associated resources for reload" .format(self.user_id, self.image_id)) # for pkg_vuln in img.vulnerabilities(): # db.delete(pkg_vuln) db.delete(img) # Close the session during the data fetch. # db.close() image_obj = self._load_image_analysis() if not image_obj: logger.error("Could not load image analysis") raise ImageLoadError( "Failed to load image: user_id = {}, image_id = {}, fetch_url = {}" .format(self.user_id, self.image_id, self.fetch_url)) db = get_session() try: logger.info("Adding image to db") db.add(image_obj) with timer("Generating vulnerability matches", log_level="info"): get_vulnerabilities_provider().load_image( image=image_obj, db_session=db, use_store=True, # save results to cache ) db.commit() except: logger.exception("Error adding image to db") db.rollback() raise return ImageLoadResult(image_obj) except Exception as e: logger.exception("Error loading and scanning image: {}".format( self.image_id)) raise finally: self.stop_time = datetime.datetime.utcnow()
def get_image_vulnerabilities( self, image: Image, db_session, vendor_only: bool = True, force_refresh: bool = False, cache: bool = True, ): # select the nvd class once and be done _nvd_cls, _cpe_cls = select_nvd_classes(db_session) # initialize the scanner scanner = self.__scanner__() user_id = image.user_id image_id = image.id results = [] if force_refresh: log.info("Forcing refresh of vulnerabilities for {}/{}".format( user_id, image_id)) try: scanner.flush_and_recompute_vulnerabilities( image, db_session=db_session) db_session.commit() except Exception as e: log.exception( "Error refreshing cve matches for image {}/{}".format( user_id, image_id)) db_session.rollback() return make_response_error( "Error refreshing vulnerability listing for image.", in_httpcode=500, ) db_session = get_session() db_session.refresh(image) with timer("Image vulnerability primary lookup", log_level="debug"): vulns = scanner.get_vulnerabilities(image) # Has vulnerabilities? warns = [] if not vulns: vulns = [] ns = DistroNamespace.for_obj(image) if not have_vulnerabilities_for(ns): warns = [ "No vulnerability data available for image distro: {}". format(ns.namespace_name) ] rows = [] with timer("Image vulnerability nvd metadata merge", log_level="debug"): vulns = merge_nvd_metadata_image_packages(db_session, vulns, _nvd_cls, _cpe_cls) with timer("Image vulnerability output formatting", log_level="debug"): for vuln, nvd_records in vulns: fixed_artifact = vuln.fixed_artifact() # Skip the vulnerability if the vendor_only flag is set to True and the issue won't be addressed by the vendor if vendor_only and vuln.fix_has_no_advisory( fixed_in=fixed_artifact): continue nvd_scores = [ self._make_cvss_score(score) for nvd_record in nvd_records for score in nvd_record.get_cvss_scores_nvd() ] results.append( VulnerabilityMatch( vulnerability=VulnerabilityModel( vulnerability_id=vuln.vulnerability_id, description="NA", severity=vuln.vulnerability.severity, link=vuln.vulnerability.link, feed="vulnerabilities", feed_group=vuln.vulnerability.namespace_name, cvss_scores_nvd=nvd_scores, cvss_scores_vendor=[], created_at=vuln.vulnerability.created_at, last_modified=vuln.vulnerability.updated_at, ), artifact=Artifact( name=vuln.pkg_name, version=vuln.package.fullversion, pkg_type=vuln.pkg_type, pkg_path=vuln.pkg_path, cpe="None", cpe23="None", ), fixes=[ FixedArtifact( version=str( vuln.fixed_in(fixed_in=fixed_artifact)), wont_fix=vuln.fix_has_no_advisory( fixed_in=fixed_artifact), observed_at=fixed_artifact.fix_observed_at if fixed_artifact else None, ) ], match=Match(detected_at=vuln.created_at), )) # TODO move dedup here so api doesn't have to # cpe_vuln_listing = [] try: with timer("Image vulnerabilities cpe matches", log_level="debug"): all_cpe_matches = scanner.get_cpe_vulnerabilities( image, _nvd_cls, _cpe_cls) if not all_cpe_matches: all_cpe_matches = [] api_endpoint = self._get_api_endpoint() for image_cpe, vulnerability_cpe in all_cpe_matches: link = vulnerability_cpe.parent.link if not link: link = "{}/query/vulnerabilities?id={}".format( api_endpoint, vulnerability_cpe.vulnerability_id) nvd_scores = [ self._make_cvss_score(score) for score in vulnerability_cpe.parent.get_cvss_scores_nvd() ] vendor_scores = [ self._make_cvss_score(score) for score in vulnerability_cpe.parent.get_cvss_scores_vendor() ] results.append( VulnerabilityMatch( vulnerability=VulnerabilityModel( vulnerability_id=vulnerability_cpe.parent. normalized_id, description="NA", severity=vulnerability_cpe.parent.severity, link=link, feed=vulnerability_cpe.feed_name, feed_group=vulnerability_cpe.namespace_name, cvss_scores_nvd=nvd_scores, cvss_scores_vendor=vendor_scores, created_at=vulnerability_cpe.parent.created_at, last_modified=vulnerability_cpe.parent. updated_at, ), artifact=Artifact( name=image_cpe.name, version=image_cpe.version, pkg_type=image_cpe.pkg_type, pkg_path=image_cpe.pkg_path, cpe=image_cpe.get_cpestring(), cpe23=image_cpe.get_cpe23string(), ), fixes=[ FixedArtifact( version=item, wont_fix=False, observed_at=vulnerability_cpe.created_at, ) for item in vulnerability_cpe.get_fixed_in() ], # using vulnerability created_at to indicate the match timestamp for now match=Match( detected_at=vulnerability_cpe.created_at), )) except Exception as err: log.exception("could not fetch CPE matches") import uuid return ImageVulnerabilitiesReport( account_id=image.user_id, image_id=image_id, results=results, metadata=VulnerabilitiesReportMetadata( generated_at=datetime.datetime.utcnow(), uuid=str(uuid.uuid4()), generated_by=self._get_provider_metadata(), ), problems=[], )