def sync_from_fetched(fetched_repo: LocalFeedDataRepo, catalog_client: CatalogClient = None, operation_id=None, full_flush=False): """ Sync the data from a local fetched repo :param operation_id: :param catalog_client: :param fetched_repo: :return: """ # Load the feed objects if not (fetched_repo.metadata and fetched_repo.metadata.download_result and fetched_repo.metadata.download_result.results): raise ValueError('Fetched repo has no download result records') else: feed_objs = [feed_instance_by_name(f) for f in set([x.feed for x in fetched_repo.metadata.download_result.results])] result = [] for f in feed_objs: try: t = time.time() try: log.info('Syncing downloaded feed data into database (operation_id={})'.format(operation_id)) # Do the sync from the local data result.append(f.sync(fetched_data=fetched_repo, event_client=catalog_client, operation_id=operation_id, full_flush=full_flush)) except Exception as e: log.exception('Failure updating the {} feed from downloaded data (operation_id={})'.format(f.__feed_name__, operation_id)) fail_result = build_feed_sync_results(feed=f.__feed_name__) fail_result['total_time_seconds'] = time.time() - t result.append(fail_result) except: log.exception('Error syncing feed {} (operation_id={})'.format(f.__feed_name__, operation_id)) raise return result
def update_counts(): for feed in get_all_feeds_detached(): try: f = feed_instance_by_name(feed.name) f.update_counts() except KeyError: logger.warn( 'Could not find feed instance for name %s. Cannot update counts', feed.name)
def evaluate(self, image_obj, context): try: feed_meta = feed_instance_by_name('packages').group_by_name(FEED_KEY) if feed_meta and feed_meta.last_sync: return except Exception as e: log.exception('Error determining feed presence for gems. Defaulting to firing trigger') self._fire() return
def delete_feed(feed_name): """ :param feed_name: :return: """ f = feed_instance_by_name(feed_name) if not f: raise KeyError(feed_name) return f.flush_all()
def delete_feed_group(feed_name, group_name): """ :param feed_name: :param group_name: :return: """ # TODO throw exception if feed is grypedb f = feed_instance_by_name(feed_name) if not f: raise KeyError(feed_name) return f.flush_group(group_name)
def delete_feed_group(feed_name, group_name): """ :param feed_name: :param group_name: :return: """ f = feed_instance_by_name(feed_name) if not f: raise KeyError(feed_name) return f.flush_group(group_name)
def test_group_lookups(test_data_env): r = DataFeeds.sync_metadata(feed_client=test_data_env.feed_client) assert r == empty_metadata_sync_result, 'No metadata should be returned from sync with empty to_sync input' r = DataFeeds.sync_metadata(feed_client=test_data_env.feed_client, to_sync=['vulnerabilities']) assert r and len( r[0] ) == 1, 'Metadata should be returned from sync with non-empty to_sync list' df = feed_instance_by_name('vulnerabilities') assert df is not None, 'vulnerabilities feed instance not loaded' assert df.metadata, 'No vuln metadata found' logger.info('Vuln feed metadata {}'.format(df.metadata.to_json())) assert not df.group_by_name('not_a_real_Group'), 'Found non-existent group' assert df.group_by_name('alpine:3.6'), 'Should have found group alpine:3.6'
def test_group_lookups(test_data_env): r = DataFeeds.sync_metadata(feed_client=test_data_env.feed_client) assert ( r == empty_metadata_sync_result ), "No metadata should be returned from sync with empty to_sync input" r = DataFeeds.sync_metadata(feed_client=test_data_env.feed_client, to_sync=["vulnerabilities"]) assert ( r and len(r[0]) == 1 ), "Metadata should be returned from sync with non-empty to_sync list" df = feed_instance_by_name("vulnerabilities") assert df is not None, "vulnerabilities feed instance not loaded" assert df.metadata, "No vuln metadata found" logger.info("Vuln feed metadata {}".format(df.metadata.to_json())) assert not df.group_by_name("not_a_real_Group"), "Found non-existent group" assert df.group_by_name("alpine:3.6"), "Should have found group alpine:3.6"
def sync(to_sync=None, full_flush=False, catalog_client=None, feed_client=None, operation_id=None): """ Sync all feeds. :return: """ result = [] if not feed_client: feed_client = get_client() logger.info('Performing sync of feeds: {} (operation_id={})'.format( 'all' if to_sync is None else to_sync, operation_id)) updated, failed = DataFeeds.sync_metadata(feed_client=feed_client, to_sync=to_sync, operation_id=operation_id) updated_names = set(updated.keys()) # Feeds configured to sync but that were not on the upstream source at all for feed_name in set(to_sync).difference(updated_names): failed.append((feed_name, 'Feed not found on upstream source')) # Build the list of feed instances to execute the syncs on feeds_to_sync = [] for feed_name in updated_names: try: feeds_to_sync.append(feed_instance_by_name(feed_name)) except KeyError as e: logger.error( 'Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})' .format(feed_name, str(e), operation_id)) failed.append((feed_name, e)) except Exception as e: logger.error( 'Could not initialize metadata for feed {}. Error: {}. (operation_id={})' .format(feed_name, str(e), operation_id)) logger.warn( 'Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})' .format(feed_name, operation_id)) failed.append((feed_name, e)) # Process the feeds that failed for any reason pre-data-download result.extend( DataFeeds._process_failed_feeds(failed_tuples=failed, catalog_client=catalog_client, operation_id=operation_id)) # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas) feeds_to_sync = _ordered_feeds(feeds_to_sync) # Do the fetches groups_to_download = [] for f in feeds_to_sync: logger.info( 'Initialized feed to sync: {} (operation_id={})'.format( f.__feed_name__, operation_id)) if f.metadata: if f.metadata.enabled: for g in f.metadata.groups: if g.enabled: groups_to_download.append(g) else: logger.info( "Will not sync/download group {} of feed {} because group is explicitly disabled" .format(g.name, g.feed_name)) else: logger.info( 'Skipping feed {} because it is explicitly not enabled' .format(f.__feed_name__)) else: logger.warn( 'No metadata found for feed {}. Unexpected but not an error (operation_id={})' .format(f.__feed_name__, operation_id)) logger.debug('Groups to download {}'.format(groups_to_download)) if not feed_client: feed_client = get_client() base_dir = DataFeeds.__scratch_dir__ if DataFeeds.__scratch_dir__ else localconfig.get_config( ).get('tmp_dir') download_dir = os.path.join(base_dir, 'policy_engine_tmp', 'feed_syncs') feed_data_repo = None try: # Order by feed for f in feeds_to_sync: feed_result = build_feed_sync_results(feed=f.__feed_name__, status='failure') feed_result['status'] = 'success' try: # Feed level notification and log msg notify_event(FeedSyncStarted(feed=f.__feed_name__), catalog_client, operation_id=operation_id) groups_to_sync = [ x for x in groups_to_download if x.feed_name == f.__feed_name__ ] logger.debug('Groups to sync {}'.format(groups_to_sync)) # Filter groups by that feed for g in groups_to_sync: # Down load just one group into a download result group_download_config = DownloadOperationConfiguration.generate_new( feed_client.feed_url, db_groups_to_sync=[g]) downloader = FeedDownloader( download_root_dir=download_dir, config=group_download_config, client=feed_client, fetch_all=full_flush) logger.debug('Groups to download {}'.format( downloader.config.groups)) try: notify_event(FeedGroupSyncStarted(feed=g.feed_name, group=g.name), catalog_client, operation_id=operation_id) logger.info( 'Beginning feed data fetch (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) feed_data_repo = downloader.execute( feed_name=g.feed_name, group_name=g.name) logger.info( 'Download complete. Syncing to db (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) f_result = DataFeeds.sync_from_fetched( feed_data_repo, catalog_client=catalog_client, operation_id=operation_id, full_flush=full_flush) # Extract the single group record... group_result = _get_group_result(f_result) logger.info( 'DB Sync complete (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) if group_result['status'] == 'success': notify_event(FeedGroupSyncCompleted( feed=f.__feed_name__, group=g.name, result=group_result), catalog_client, operation_id=operation_id) else: # If any fails, the whole feed is marked as failed feed_result['status'] = 'failure' notify_event(FeedGroupSyncFailed( feed=f.__feed_name__, group=g.name, error='Failed to sync to db'), catalog_client, operation_id=operation_id) feed_result['groups'].append(group_result) except Exception as e: logger.error( 'Error syncing {}/{} (operation_id={})'.format( g.feed_name, g.name, operation_id)) notify_event( FeedGroupSyncFailed(feed=g.feed_name, group=g.name, error=e), catalog_client, operation_id) feed_result['status'] = 'failure' finally: try: feed_data_repo.teardown() except: logger.exception( 'Could not cleanup download repo due to error' ) feed_data_repo = None except Exception as e: logger.error('Error syncing {} (operation_id={})'.format( f, operation_id)) if feed_result['status'] == 'success': notify_event(FeedSyncCompleted(feed=f.__feed_name__), catalog_client, operation_id) else: notify_event( FeedSyncFailed( feed=f.__feed_name__, error='One or more groups failed to sync'), catalog_client, operation_id) result.append(feed_result) finally: if feed_data_repo: feed_data_repo.teardown() return result
def records_for(feed_name, group_name): try: return feed_instance_by_name(feed_name).record_count(group_name) except KeyError as e: log.debug('cannot compute record count for unknown feed: {}'.format(e)) return 0
def sync( sync_util_provider: SyncUtilProvider, full_flush: bool = False, catalog_client: CatalogClient = None, operation_id: Optional[str] = None, ) -> List[FeedSyncResult]: """ Sync all feeds. :param sync_util_provider: provider for sync utils (switches logic for legacy / grypedb feeds) :type sync_util_provider: SyncUtilProvider :param full_flush: whether not not to flush out the existing records before sync :type full_flush: bool :param catalog_client: catalog client :type catalog_client: CatalogClient :param operation_id: UUID4 hexadecimal string representing this operation :type operation_id: Optional[str] :return: list of FeedSyncResult :rtype: List[FeedSyncResult] """ result = [] to_sync = sync_util_provider.to_sync if not to_sync: return result feed_client = sync_util_provider.get_client() logger.info( "Performing sync of feeds: {} (operation_id={})".format( "all" if to_sync is None else to_sync, operation_id ) ) source_feeds = DataFeeds.get_feed_group_information(feed_client, to_sync) updated, failed = sync_util_provider.sync_metadata(source_feeds, operation_id) updated_names = set(updated.keys()) # Feeds configured to sync but that were not on the upstream source at all for feed_name in set(to_sync).difference(updated_names): failed.append((feed_name, "Feed not found on upstream source")) # Build the list of feed instances to execute the syncs on feeds_to_sync = [] for feed_name in updated_names: try: feeds_to_sync.append(feed_instance_by_name(feed_name)) except KeyError as e: logger.error( "Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})".format( feed_name, str(e), operation_id ) ) failed.append((feed_name, e)) except Exception as e: logger.error( "Could not initialize metadata for feed {}. Error: {}. (operation_id={})".format( feed_name, str(e), operation_id ) ) logger.warn( "Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})".format( feed_name, operation_id ) ) failed.append((feed_name, e)) # Process the feeds that failed for any reason pre-data-download result.extend( DataFeeds._process_failed_feeds( failed_tuples=failed, catalog_client=catalog_client, operation_id=operation_id, ) ) # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas) feeds_to_sync = _ordered_feeds(feeds_to_sync) groups_to_download = sync_util_provider.get_groups_to_download( source_feeds, feeds_to_sync, operation_id ) logger.debug("Groups to download {}".format(groups_to_download)) base_dir = ( DataFeeds.__scratch_dir__ if DataFeeds.__scratch_dir__ else localconfig.get_config().get("tmp_dir") ) download_dir = os.path.join(base_dir, "policy_engine_tmp", "feed_syncs") feed_data_repo = None try: # Order by feed for f in feeds_to_sync: feed_result = FeedSyncResult(feed=f.__feed_name__, status="success") try: # Feed level notification and log msg notify_event( FeedSyncStarted(feed=f.__feed_name__), catalog_client, operation_id=operation_id, ) groups_to_sync = [ x for x in groups_to_download if x.feed_name == f.__feed_name__ ] logger.debug("Groups to sync {}".format(groups_to_sync)) # Filter groups by that feed for g in groups_to_sync: # Down load just one group into a download result group_download_config = download_operation_config_factory( feed_client.feed_url, db_groups_to_sync=[g] ) downloader = FeedDownloader( download_root_dir=download_dir, config=group_download_config, client=feed_client, fetch_all=full_flush, ) logger.debug( "Groups to download {}".format(downloader.config.groups) ) try: notify_event( FeedGroupSyncStarted(feed=g.feed_name, group=g.name), catalog_client, operation_id=operation_id, ) logger.info( "Beginning feed data fetch (feed={}, group={}, operation_id={})".format( g.feed_name, g.name, operation_id ) ) feed_data_repo = downloader.execute( feed_name=g.feed_name, group_name=g.name ) logger.info( "Download complete. Syncing to db (feed={}, group={}, operation_id={})".format( g.feed_name, g.name, operation_id ) ) f_result = DataFeeds.sync_from_fetched( feed_data_repo, catalog_client=catalog_client, operation_id=operation_id, full_flush=full_flush, ) # Extract the single group record... group_result = sync_util_provider.retrieve_group_result( f_result, g ) logger.info( "DB Sync complete (feed={}, group={}, operation_id={})".format( g.feed_name, g.name, operation_id ) ) if group_result.status == "success": notify_event( FeedGroupSyncCompleted( feed=f.__feed_name__, group=g.name, result=asdict(group_result), ), catalog_client, operation_id=operation_id, ) else: # If any fails, the whole feed is marked as failed feed_result.status = "failure" notify_event( FeedGroupSyncFailed( feed=f.__feed_name__, group=g.name, error="Failed to sync to db", ), catalog_client, operation_id=operation_id, ) sync_util_provider.update_feed_result( feed_result, f_result, group_result ) except Exception as e: logger.error( "Error syncing {}/{} (operation_id={})".format( g.feed_name, g.name, operation_id ) ) notify_event( FeedGroupSyncFailed( feed=g.feed_name, group=g.name, error=e ), catalog_client, operation_id, ) feed_result.status = "failure" finally: try: feed_data_repo.teardown() except Exception: logger.exception( "Could not cleanup download repo due to error" ) feed_data_repo = None except Exception: logger.exception( "Error syncing {} (operation_id={})".format(f, operation_id) ) if feed_result.status == "success": notify_event( FeedSyncCompleted(feed=f.__feed_name__), catalog_client, operation_id, ) else: notify_event( FeedSyncFailed( feed=f.__feed_name__, error="One or more groups failed to sync", ), catalog_client, operation_id, ) result.append(feed_result) finally: if feed_data_repo: feed_data_repo.teardown() return result
def update_counts(): for feed in get_all_feeds_detached(): f = feed_instance_by_name(feed.name) f.update_counts()