def _process_failed_feeds(failed_tuples, catalog_client, operation_id=None) -> list: """ :param failed_tuples: iterable of tuples of form (str, exception or str) where element 0 is feed name and element 1 is the failure error :param catalog_client: :param operation_id: :return: """ fail_results = [] for name, error in failed_tuples: try: # Emit the events for a start/stop that failed since without metadata sync we cannot sync the feed reliably notify_event(FeedSyncStarted(feed=name), catalog_client, operation_id=operation_id) notify_event(FeedSyncFailed(feed=name, error=error), catalog_client, operation_id=operation_id) except: logger.exception('Error emitting feed sync failure events') finally: feed_result = build_feed_sync_results(feed=name, status='failure') fail_results.append(feed_result) return fail_results
def sync(to_sync=None, full_flush=False, catalog_client=None, feed_client=None, operation_id=None): """ Sync all feeds. :return: """ result = [] if not feed_client: feed_client = get_client() logger.info('Performing sync of feeds: {} (operation_id={})'.format( 'all' if to_sync is None else to_sync, operation_id)) updated, failed = DataFeeds.sync_metadata(feed_client=feed_client, to_sync=to_sync, operation_id=operation_id) updated_names = set(updated.keys()) # Feeds configured to sync but that were not on the upstream source at all for feed_name in set(to_sync).difference(updated_names): failed.append((feed_name, 'Feed not found on upstream source')) # Build the list of feed instances to execute the syncs on feeds_to_sync = [] for feed_name in updated_names: try: feeds_to_sync.append(feed_instance_by_name(feed_name)) except KeyError as e: logger.error( 'Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})' .format(feed_name, str(e), operation_id)) failed.append((feed_name, e)) except Exception as e: logger.error( 'Could not initialize metadata for feed {}. Error: {}. (operation_id={})' .format(feed_name, str(e), operation_id)) logger.warn( 'Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})' .format(feed_name, operation_id)) failed.append((feed_name, e)) # Process the feeds that failed for any reason pre-data-download result.extend( DataFeeds._process_failed_feeds(failed_tuples=failed, catalog_client=catalog_client, operation_id=operation_id)) # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas) feeds_to_sync = _ordered_feeds(feeds_to_sync) # Do the fetches groups_to_download = [] for f in feeds_to_sync: logger.info( 'Initialized feed to sync: {} (operation_id={})'.format( f.__feed_name__, operation_id)) if f.metadata: if f.metadata.enabled: for g in f.metadata.groups: if g.enabled: groups_to_download.append(g) else: logger.info( "Will not sync/download group {} of feed {} because group is explicitly disabled" .format(g.name, g.feed_name)) else: logger.info( 'Skipping feed {} because it is explicitly not enabled' .format(f.__feed_name__)) else: logger.warn( 'No metadata found for feed {}. Unexpected but not an error (operation_id={})' .format(f.__feed_name__, operation_id)) logger.debug('Groups to download {}'.format(groups_to_download)) if not feed_client: feed_client = get_client() base_dir = DataFeeds.__scratch_dir__ if DataFeeds.__scratch_dir__ else localconfig.get_config( ).get('tmp_dir') download_dir = os.path.join(base_dir, 'policy_engine_tmp', 'feed_syncs') feed_data_repo = None try: # Order by feed for f in feeds_to_sync: feed_result = build_feed_sync_results(feed=f.__feed_name__, status='failure') feed_result['status'] = 'success' try: # Feed level notification and log msg notify_event(FeedSyncStarted(feed=f.__feed_name__), catalog_client, operation_id=operation_id) groups_to_sync = [ x for x in groups_to_download if x.feed_name == f.__feed_name__ ] logger.debug('Groups to sync {}'.format(groups_to_sync)) # Filter groups by that feed for g in groups_to_sync: # Down load just one group into a download result group_download_config = DownloadOperationConfiguration.generate_new( feed_client.feed_url, db_groups_to_sync=[g]) downloader = FeedDownloader( download_root_dir=download_dir, config=group_download_config, client=feed_client, fetch_all=full_flush) logger.debug('Groups to download {}'.format( downloader.config.groups)) try: notify_event(FeedGroupSyncStarted(feed=g.feed_name, group=g.name), catalog_client, operation_id=operation_id) logger.info( 'Beginning feed data fetch (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) feed_data_repo = downloader.execute( feed_name=g.feed_name, group_name=g.name) logger.info( 'Download complete. Syncing to db (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) f_result = DataFeeds.sync_from_fetched( feed_data_repo, catalog_client=catalog_client, operation_id=operation_id, full_flush=full_flush) # Extract the single group record... group_result = _get_group_result(f_result) logger.info( 'DB Sync complete (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) if group_result['status'] == 'success': notify_event(FeedGroupSyncCompleted( feed=f.__feed_name__, group=g.name, result=group_result), catalog_client, operation_id=operation_id) else: # If any fails, the whole feed is marked as failed feed_result['status'] = 'failure' notify_event(FeedGroupSyncFailed( feed=f.__feed_name__, group=g.name, error='Failed to sync to db'), catalog_client, operation_id=operation_id) feed_result['groups'].append(group_result) except Exception as e: logger.error( 'Error syncing {}/{} (operation_id={})'.format( g.feed_name, g.name, operation_id)) notify_event( FeedGroupSyncFailed(feed=g.feed_name, group=g.name, error=e), catalog_client, operation_id) feed_result['status'] = 'failure' finally: try: feed_data_repo.teardown() except: logger.exception( 'Could not cleanup download repo due to error' ) feed_data_repo = None except Exception as e: logger.error('Error syncing {} (operation_id={})'.format( f, operation_id)) if feed_result['status'] == 'success': notify_event(FeedSyncCompleted(feed=f.__feed_name__), catalog_client, operation_id) else: notify_event( FeedSyncFailed( feed=f.__feed_name__, error='One or more groups failed to sync'), catalog_client, operation_id) result.append(feed_result) finally: if feed_data_repo: feed_data_repo.teardown() return result
def execute(self): logger.info('Starting feed sync. (operation_id={})'.format(self.uuid)) # Feed syncs will update the images with any new cves that are pulled in for a the sync. As such, any images that are loaded while the sync itself is in progress need to be # re-scanned for cves since the transaction ordering can result in the images being loaded with data prior to sync but not included in the sync process itself. # Create feed task begin event error = None with session_scope() as session: mgr = identities.manager_factory.for_session(session) catalog_client = internal_client_for(CatalogClient, userId=None) try: notify_event( FeedSyncTaskStarted( groups=self.feeds if self.feeds else 'all'), catalog_client, self.uuid) except: logger.exception( 'Ignoring event generation error before feed sync. (operation_id={})' .format(self.uuid)) start_time = datetime.datetime.utcnow() try: start_time = datetime.datetime.utcnow() updated_dict = DataFeeds.sync(to_sync=self.feeds, full_flush=self.full_flush, catalog_client=catalog_client, operation_id=self.uuid) logger.info('Feed sync complete (operation_id={})'.format( self.uuid)) return updated_dict except Exception as e: error = e logger.exception( 'Failure refreshing and syncing feeds. (operation_id={})'. format(self.uuid)) raise finally: end_time = datetime.datetime.utcnow() # log feed sync event try: if error: notify_event( FeedSyncTaskFailed( groups=self.feeds if self.feeds else 'all', error=error), catalog_client, self.uuid) else: notify_event( FeedSyncTaskCompleted( groups=self.feeds if self.feeds else 'all'), catalog_client, self.uuid) except: logger.exception( 'Ignoring event generation error after feed sync (operation_id={})' .format(self.uuid)) try: self.rescan_images_created_between(from_time=start_time, to_time=end_time) except: logger.exception( 'Unexpected exception rescanning vulns for images added during the feed sync. (operation_id={})' .format(self.uuid)) raise finally: end_session()