class VulnDBFeed(AnchoreServiceFeed): """ Feed for VulnDB data served from on-prem enterprise feed service """ __feed_name__ = 'vulndb' _cve_key = 'id' __group_data_mappers__ = SingleTypeMapperFactory(__feed_name__, VulnDBFeedDataMapper, _cve_key) def _flush_group(self, group_obj, flush_helper_fn=None, operation_id=None): db = get_session() if flush_helper_fn: flush_helper_fn(db=db, feed_name=group_obj.feed_name, group_name=group_obj.name) count = db.query(VulnDBCpe).filter(VulnDBCpe.namespace_name == group_obj.name).delete() logger.info(log_msg_ctx(operation_id, group_obj.name, group_obj.feed_name, 'Flushed {} VulnDBCpe records'.format(count))) count = db.query(VulnDBMetadata).filter(VulnDBMetadata.namespace_name == group_obj.name).delete() logger.info(log_msg_ctx(operation_id, group_obj.name, group_obj.feed_name, 'Flushed {} VulnDBMetadata records'.format(count))) group_obj.last_sync = None group_obj.count = 0 db.flush() def record_count(self, group_name, db): try: return db.query(VulnDBMetadata).filter(VulnDBMetadata.namespace_name == group_name).count() except Exception as e: logger.exception('Error getting feed data group record count in vulndb feed for group: {}'.format(group_name)) raise
class NvdV2Feed(AnchoreServiceFeed): """ Feed for package data, served from the anchore feed service backend """ __feed_name__ = 'nvdv2' _cve_key = 'id' __group_data_mappers__ = SingleTypeMapperFactory(__feed_name__, NvdV2FeedDataMapper, _cve_key) def _flush_group(self, group_obj, flush_helper_fn=None, operation_id=None): db = get_session() if flush_helper_fn: flush_helper_fn(db=db, feed_name=group_obj.feed_name, group_name=group_obj.name) count = db.query(CpeV2Vulnerability).filter(CpeV2Vulnerability.namespace_name == group_obj.name).delete() logger.info(log_msg_ctx(operation_id, group_obj.name, group_obj.feed_name, 'Flushed {} CpeV2Vuln records'.format(count))) count = db.query(NvdV2Metadata).filter(NvdV2Metadata.namespace_name == group_obj.name).delete() logger.info(log_msg_ctx(operation_id, group_obj.name, group_obj.feed_name, 'Flushed {} NvdV2 records'.format(count))) group_obj.last_sync = None group_obj.count = 0 db.flush() def record_count(self, group_name, db): try: return db.query(NvdV2Metadata).filter(NvdV2Metadata.namespace_name == group_name).count() except Exception as e: logger.exception('Error getting feed data group record count in package feed for group: {}'.format(group_name)) raise
class GithubFeed(VulnerabilityFeed): """ Feed for the Github Advisories data """ __feed_name__ = "github" _cve_key = "id" __group_data_mappers__ = SingleTypeMapperFactory( __feed_name__, GithubFeedDataMapper, _cve_key )
class VulnerabilityFeed(AnchoreServiceFeed): """ Vulnerabilities feed from anchore feed service backend. Unique in that the records are nested and have structure. Each vulnerability record maps to a set of records in the DB: one for the vulnerability and a set for each of the FixedIn and VulnerableIn collections that are optionally present for the vulnerability main record. """ __feed_name__ = "vulnerabilities" _cve_key = "Name" __group_data_mappers__ = SingleTypeMapperFactory( __feed_name__, VulnerabilityFeedDataMapper, _cve_key ) __vuln_processing_fn__ = process_updated_vulnerability __flush_helper_fn__ = flush_vulnerability_matches def _sync_group( self, group_download_result: GroupDownloadResult, full_flush=False, local_repo=None, operation_id=None, ): """ Sync data from a single group and return the data. This operation is scoped to a transaction on the db. :param group_download_result :return: """ total_updated_count = 0 result = build_group_sync_result() result["group"] = group_download_result.group sync_started = None db = get_session() db.refresh(self.metadata) group_db_obj = self.group_by_name(group_download_result.group) if not group_db_obj: logger.error( log_msg_ctx( operation_id, group_download_result.feed, group_download_result.group, "Skipping group sync. Record not found in db, should have been synced already", ) ) return result sync_started = time.time() download_started = group_download_result.started.replace( tzinfo=datetime.timezone.utc ) try: updated_images = ( set() ) # To get unique set of all images updated by this sync if full_flush: logger.info( log_msg_ctx( operation_id, group_download_result.feed, group_download_result.group, "Performing group data flush prior to sync", ) ) self._flush_group(group_db_obj, operation_id=operation_id) mapper = self._load_mapper(group_db_obj) # Iterate thru the records and commit count = 0 for record in local_repo.read( group_download_result.feed, group_download_result.group, 0 ): mapped = mapper.map(record) updated_image_ids = self.update_vulnerability( db, mapped, vulnerability_processing_fn=VulnerabilityFeed.__vuln_processing_fn__, ) updated_images = updated_images.union( set(updated_image_ids) ) # Record after commit to ensure in-sync. merged = db.merge(mapped) total_updated_count += 1 count += 1 if len(updated_image_ids) > 0: db.flush() # Flush after every one so that mem footprint stays small if lots of images are updated if count >= self.RECORDS_PER_CHUNK: # Commit group_db_obj.count = self.record_count(group_db_obj.name, db) db.commit() logger.info( log_msg_ctx( operation_id, group_download_result.feed, group_download_result.group, "DB Update Progress: {}/{}".format( total_updated_count, group_download_result.total_records ), ) ) db = get_session() count = 0 else: group_db_obj.count = self.record_count(group_db_obj.name, db) db.commit() logger.info( log_msg_ctx( operation_id, group_download_result.feed, group_download_result.group, "DB Update Progress: {}/{}".format( total_updated_count, group_download_result.total_records ), ) ) db = get_session() logger.debug( log_msg_ctx( operation_id, group_download_result.feed, group_download_result.group, "Updating last sync timestamp to {}".format(download_started), ) ) group_db_obj = self.group_by_name(group_download_result.group) group_db_obj.last_sync = download_started group_db_obj.count = self.record_count(group_db_obj.name, db) db.add(group_db_obj) db.commit() except Exception as e: logger.exception( log_msg_ctx( operation_id, group_download_result.feed, group_download_result.group, "Error syncing group", ) ) db.rollback() raise e finally: total_group_time = time.time() - download_started.timestamp() sync_time = time.time() - sync_started logger.info( log_msg_ctx( operation_id, group_download_result.feed, group_download_result.group, "Sync to db duration: {} sec".format(sync_time), ) ) logger.info( log_msg_ctx( operation_id, group_download_result.feed, group_download_result.group, "Total sync, including download, duration: {} sec".format( total_group_time ), ) ) result["updated_record_count"] = total_updated_count result["status"] = "success" result["total_time_seconds"] = total_group_time result["updated_image_count"] = 0 return result @staticmethod def _are_match_equivalent(vulnerability_a, vulnerability_b): """ Returns true if the two records (including child fixedin and/or vulnerablein records) are equivalent in terms of package matching. TODO: move this logic to an vuln-scan abstraction, but that abstraction needs more work before it's ready. Would like to keep the definition of what impacts matches centralized so as not to get out-of-sync. :param vulnerability_a: :param vulnerability_b: :return: """ if ( not (vulnerability_a and vulnerability_b) or vulnerability_a.id != vulnerability_b.id or vulnerability_a.namespace_name != vulnerability_b.namespace_name ): # They aren't the same item reference logger.debug( "Vuln id or namespaces are different: {} {} {} {}".format( vulnerability_a.id, vulnerability_b.id, vulnerability_a.namespace_name, vulnerability_b.namespace_name, ) ) return False normalized_fixes_a = { (fix.name, fix.epochless_version, fix.version) for fix in vulnerability_a.fixed_in } normalized_fixes_b = { (fix.name, fix.epochless_version, fix.version) for fix in vulnerability_b.fixed_in } fix_diff = normalized_fixes_a.symmetric_difference(normalized_fixes_b) if fix_diff: logger.debug("Fixed In records diff: {}".format(fix_diff)) return False return True def update_vulnerability( self, db, vulnerability_record, vulnerability_processing_fn=None ): """ Processes a single vulnerability record. Specifically for vulnerabilities: Checks and updates any fixed-in or vulnerable-in records and given the final state of the vulneraability, calls the item_callback function which is expected to do things like: update image vulnerability lists based on the new item. :param vulnerability_record: the record from the feed source to process and load into the db. :param vulnerability_processing_fn: a callback function to execute with the new date, but before any transaction commit :return: """ try: updates = [] try: existing = ( db.query(Vulnerability) .filter( Vulnerability.id == vulnerability_record.id, Vulnerability.namespace_name == vulnerability_record.namespace_name, ) .one_or_none() ) except: logger.debug( "No current record found for {}".format(vulnerability_record) ) existing = None if existing: needs_update = not VulnerabilityFeed._are_match_equivalent( existing, vulnerability_record ) if needs_update: logger.debug( "Found update that requires an image match update from {} to {}".format( existing, vulnerability_record ) ) else: needs_update = True merged = db.merge(vulnerability_record) if vulnerability_processing_fn and needs_update: updates = vulnerability_processing_fn(db, merged) else: logger.debug( "Skipping image processing due to no diff: {}".format(merged) ) return updates except Exception as e: logger.exception("Error in vulnerability processing") raise e def _flush_group(self, group_obj, operation_id=None): logger.info( log_msg_ctx( operation_id, group_obj.name, group_obj.feed_name, "Flushing group records", ) ) db = get_session() VulnerabilityFeed.__flush_helper_fn__( db=db, feed_name=group_obj.feed_name, group_name=group_obj.name ) count = ( db.query(FixedArtifact) .filter(FixedArtifact.namespace_name == group_obj.name) .delete() ) logger.info( log_msg_ctx( operation_id, group_obj.name, group_obj.feed_name, "Flushed {} fix records".format(count), ) ) count = ( db.query(VulnerableArtifact) .filter(VulnerableArtifact.namespace_name == group_obj.name) .delete() ) logger.info("Flushed %s vuln artifact records", count) count = ( db.query(Vulnerability) .filter(Vulnerability.namespace_name == group_obj.name) .delete() ) logger.info( log_msg_ctx( operation_id, group_obj.name, group_obj.feed_name, "Flushed {} vulnerability records".format(count), ) ) group_obj.last_sync = None # Null the update timestamp to reflect the flush group_obj.count = 0 db.flush() def sync( self, fetched_data: LocalFeedDataRepo, full_flush: bool = False, event_client: CatalogClient = None, operation_id=None, group=None, ) -> dict: """ Sync data with the feed source. This may be *very* slow if there are lots of updates. Returns a dict with the following structure: { 'group_name': [ record1, record2, ..., recordN], 'group_name2': [ record1, record2, ...., recordM], ... } :param: group: The group to sync, optionally. If not specified, all groups are synced. :return: changed data updated in the sync as a list of records """ if self.metadata and self.metadata.groups: # Setup the group name cache ThreadLocalFeedGroupNameCache.add( [(x.name, x.enabled) for x in self.metadata.groups] ) else: ThreadLocalFeedGroupNameCache.flush() try: return super().sync( fetched_data, full_flush, event_client, operation_id=operation_id, group=group, ) finally: ThreadLocalFeedGroupNameCache.flush() def record_count(self, group_name, db): try: return ( db.query(Vulnerability) .filter(Vulnerability.namespace_name == group_name) .count() ) except Exception as e: logger.exception( "Error getting feed data group record count in package feed for group: {}".format( group_name ) ) raise