Beispiel #1
0
    def _process_failed_feeds(failed_tuples,
                              catalog_client,
                              operation_id=None) -> list:
        """
        :param failed_tuples: iterable of tuples of form (str, exception or str) where element 0 is feed name and element 1 is the failure error
        :param catalog_client:
        :param operation_id:
        :return:
        """
        fail_results = []
        for name, error in failed_tuples:
            try:
                # Emit the events for a start/stop that failed since without metadata sync we cannot sync the feed reliably
                notify_event(FeedSyncStarted(feed=name),
                             catalog_client,
                             operation_id=operation_id)
                notify_event(FeedSyncFailed(feed=name, error=error),
                             catalog_client,
                             operation_id=operation_id)
            except:
                logger.exception('Error emitting feed sync failure events')
            finally:
                feed_result = build_feed_sync_results(feed=name,
                                                      status='failure')
                fail_results.append(feed_result)

        return fail_results
Beispiel #2
0
    def sync(to_sync=None,
             full_flush=False,
             catalog_client=None,
             feed_client=None,
             operation_id=None):
        """
        Sync all feeds.
        :return:
        """

        result = []

        if not feed_client:
            feed_client = get_client()

        logger.info('Performing sync of feeds: {} (operation_id={})'.format(
            'all' if to_sync is None else to_sync, operation_id))

        updated, failed = DataFeeds.sync_metadata(feed_client=feed_client,
                                                  to_sync=to_sync,
                                                  operation_id=operation_id)
        updated_names = set(updated.keys())

        # Feeds configured to sync but that were not on the upstream source at all
        for feed_name in set(to_sync).difference(updated_names):
            failed.append((feed_name, 'Feed not found on upstream source'))

        # Build the list of feed instances to execute the syncs on
        feeds_to_sync = []
        for feed_name in updated_names:
            try:
                feeds_to_sync.append(feed_instance_by_name(feed_name))
            except KeyError as e:
                logger.error(
                    'Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})'
                    .format(feed_name, str(e), operation_id))
                failed.append((feed_name, e))
            except Exception as e:
                logger.error(
                    'Could not initialize metadata for feed {}. Error: {}. (operation_id={})'
                    .format(feed_name, str(e), operation_id))
                logger.warn(
                    'Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})'
                    .format(feed_name, operation_id))
                failed.append((feed_name, e))

        # Process the feeds that failed for any reason pre-data-download
        result.extend(
            DataFeeds._process_failed_feeds(failed_tuples=failed,
                                            catalog_client=catalog_client,
                                            operation_id=operation_id))

        # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas)
        feeds_to_sync = _ordered_feeds(feeds_to_sync)

        # Do the fetches
        groups_to_download = []
        for f in feeds_to_sync:
            logger.info(
                'Initialized feed to sync: {} (operation_id={})'.format(
                    f.__feed_name__, operation_id))
            if f.metadata:
                if f.metadata.enabled:
                    for g in f.metadata.groups:
                        if g.enabled:
                            groups_to_download.append(g)
                        else:
                            logger.info(
                                "Will not sync/download group {} of feed {} because group is explicitly disabled"
                                .format(g.name, g.feed_name))
                else:
                    logger.info(
                        'Skipping feed {} because it is explicitly not enabled'
                        .format(f.__feed_name__))
            else:
                logger.warn(
                    'No metadata found for feed {}. Unexpected but not an error (operation_id={})'
                    .format(f.__feed_name__, operation_id))

        logger.debug('Groups to download {}'.format(groups_to_download))

        if not feed_client:
            feed_client = get_client()

        base_dir = DataFeeds.__scratch_dir__ if DataFeeds.__scratch_dir__ else localconfig.get_config(
        ).get('tmp_dir')
        download_dir = os.path.join(base_dir, 'policy_engine_tmp',
                                    'feed_syncs')

        feed_data_repo = None
        try:
            # Order by feed
            for f in feeds_to_sync:
                feed_result = build_feed_sync_results(feed=f.__feed_name__,
                                                      status='failure')
                feed_result['status'] = 'success'

                try:
                    # Feed level notification and log msg
                    notify_event(FeedSyncStarted(feed=f.__feed_name__),
                                 catalog_client,
                                 operation_id=operation_id)

                    groups_to_sync = [
                        x for x in groups_to_download
                        if x.feed_name == f.__feed_name__
                    ]
                    logger.debug('Groups to sync {}'.format(groups_to_sync))

                    # Filter groups by that feed
                    for g in groups_to_sync:

                        # Down load just one group into a download result
                        group_download_config = DownloadOperationConfiguration.generate_new(
                            feed_client.feed_url, db_groups_to_sync=[g])
                        downloader = FeedDownloader(
                            download_root_dir=download_dir,
                            config=group_download_config,
                            client=feed_client,
                            fetch_all=full_flush)

                        logger.debug('Groups to download {}'.format(
                            downloader.config.groups))
                        try:
                            notify_event(FeedGroupSyncStarted(feed=g.feed_name,
                                                              group=g.name),
                                         catalog_client,
                                         operation_id=operation_id)

                            logger.info(
                                'Beginning feed data fetch (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))
                            feed_data_repo = downloader.execute(
                                feed_name=g.feed_name, group_name=g.name)

                            logger.info(
                                'Download complete. Syncing to db (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))
                            f_result = DataFeeds.sync_from_fetched(
                                feed_data_repo,
                                catalog_client=catalog_client,
                                operation_id=operation_id,
                                full_flush=full_flush)

                            # Extract the single group record...
                            group_result = _get_group_result(f_result)

                            logger.info(
                                'DB Sync complete (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))

                            if group_result['status'] == 'success':
                                notify_event(FeedGroupSyncCompleted(
                                    feed=f.__feed_name__,
                                    group=g.name,
                                    result=group_result),
                                             catalog_client,
                                             operation_id=operation_id)
                            else:
                                # If any fails, the whole feed is marked as failed
                                feed_result['status'] = 'failure'
                                notify_event(FeedGroupSyncFailed(
                                    feed=f.__feed_name__,
                                    group=g.name,
                                    error='Failed to sync to db'),
                                             catalog_client,
                                             operation_id=operation_id)

                            feed_result['groups'].append(group_result)

                        except Exception as e:
                            logger.error(
                                'Error syncing {}/{} (operation_id={})'.format(
                                    g.feed_name, g.name, operation_id))
                            notify_event(
                                FeedGroupSyncFailed(feed=g.feed_name,
                                                    group=g.name,
                                                    error=e), catalog_client,
                                operation_id)
                            feed_result['status'] = 'failure'
                        finally:
                            try:
                                feed_data_repo.teardown()
                            except:
                                logger.exception(
                                    'Could not cleanup download repo due to error'
                                )

                            feed_data_repo = None

                except Exception as e:
                    logger.error('Error syncing {} (operation_id={})'.format(
                        f, operation_id))

                if feed_result['status'] == 'success':
                    notify_event(FeedSyncCompleted(feed=f.__feed_name__),
                                 catalog_client, operation_id)
                else:
                    notify_event(
                        FeedSyncFailed(
                            feed=f.__feed_name__,
                            error='One or more groups failed to sync'),
                        catalog_client, operation_id)

                result.append(feed_result)
        finally:
            if feed_data_repo:
                feed_data_repo.teardown()

        return result
Beispiel #3
0
    def execute(self):
        logger.info('Starting feed sync. (operation_id={})'.format(self.uuid))

        # Feed syncs will update the images with any new cves that are pulled in for a the sync. As such, any images that are loaded while the sync itself is in progress need to be
        # re-scanned for cves since the transaction ordering can result in the images being loaded with data prior to sync but not included in the sync process itself.

        # Create feed task begin event
        error = None
        with session_scope() as session:
            mgr = identities.manager_factory.for_session(session)
            catalog_client = internal_client_for(CatalogClient, userId=None)

        try:
            notify_event(
                FeedSyncTaskStarted(
                    groups=self.feeds if self.feeds else 'all'),
                catalog_client, self.uuid)
        except:
            logger.exception(
                'Ignoring event generation error before feed sync. (operation_id={})'
                .format(self.uuid))

        start_time = datetime.datetime.utcnow()
        try:
            start_time = datetime.datetime.utcnow()
            updated_dict = DataFeeds.sync(to_sync=self.feeds,
                                          full_flush=self.full_flush,
                                          catalog_client=catalog_client,
                                          operation_id=self.uuid)

            logger.info('Feed sync complete (operation_id={})'.format(
                self.uuid))
            return updated_dict
        except Exception as e:
            error = e
            logger.exception(
                'Failure refreshing and syncing feeds. (operation_id={})'.
                format(self.uuid))
            raise
        finally:
            end_time = datetime.datetime.utcnow()
            # log feed sync event
            try:
                if error:
                    notify_event(
                        FeedSyncTaskFailed(
                            groups=self.feeds if self.feeds else 'all',
                            error=error), catalog_client, self.uuid)
                else:
                    notify_event(
                        FeedSyncTaskCompleted(
                            groups=self.feeds if self.feeds else 'all'),
                        catalog_client, self.uuid)
            except:
                logger.exception(
                    'Ignoring event generation error after feed sync (operation_id={})'
                    .format(self.uuid))

            try:
                self.rescan_images_created_between(from_time=start_time,
                                                   to_time=end_time)
            except:
                logger.exception(
                    'Unexpected exception rescanning vulns for images added during the feed sync. (operation_id={})'
                    .format(self.uuid))
                raise
            finally:
                end_session()