Esempio n. 1
0
    def _process_failed_feeds(failed_tuples,
                              catalog_client,
                              operation_id=None) -> list:
        """
        :param failed_tuples: iterable of tuples of form (str, exception or str) where element 0 is feed name and element 1 is the failure error
        :param catalog_client:
        :param operation_id:
        :return:
        """
        fail_results = []
        for name, error in failed_tuples:
            try:
                # Emit the events for a start/stop that failed since without metadata sync we cannot sync the feed reliably
                notify_event(FeedSyncStarted(feed=name),
                             catalog_client,
                             operation_id=operation_id)
                notify_event(FeedSyncFailed(feed=name, error=error),
                             catalog_client,
                             operation_id=operation_id)
            except:
                logger.exception('Error emitting feed sync failure events')
            finally:
                feed_result = build_feed_sync_results(feed=name,
                                                      status='failure')
                fail_results.append(feed_result)

        return fail_results
Esempio n. 2
0
    def sync_from_fetched(fetched_repo: LocalFeedDataRepo, catalog_client: CatalogClient = None, operation_id=None, full_flush=False):
        """
        Sync the data from a local fetched repo

        :param operation_id:
        :param catalog_client:
        :param fetched_repo:
        :return:
        """
        # Load the feed objects
        if not (fetched_repo.metadata and fetched_repo.metadata.download_result and fetched_repo.metadata.download_result.results):
            raise ValueError('Fetched repo has no download result records')
        else:
            feed_objs = [feed_instance_by_name(f) for f in set([x.feed for x in fetched_repo.metadata.download_result.results])]

        result = []

        for f in feed_objs:
            try:
                t = time.time()
                try:
                    log.info('Syncing downloaded feed data into database (operation_id={})'.format(operation_id))
                    # Do the sync from the local data
                    result.append(f.sync(fetched_data=fetched_repo, event_client=catalog_client, operation_id=operation_id, full_flush=full_flush))
                except Exception as e:
                    log.exception('Failure updating the {} feed from downloaded data (operation_id={})'.format(f.__feed_name__, operation_id))
                    fail_result = build_feed_sync_results(feed=f.__feed_name__)
                    fail_result['total_time_seconds'] = time.time() - t
                    result.append(fail_result)
            except:
                log.exception('Error syncing feed {} (operation_id={})'.format(f.__feed_name__, operation_id))
                raise

        return result
Esempio n. 3
0
    def sync(to_sync=None,
             full_flush=False,
             catalog_client=None,
             feed_client=None,
             operation_id=None):
        """
        Sync all feeds.
        :return:
        """

        result = []

        if not feed_client:
            feed_client = get_client()

        logger.info('Performing sync of feeds: {} (operation_id={})'.format(
            'all' if to_sync is None else to_sync, operation_id))

        updated, failed = DataFeeds.sync_metadata(feed_client=feed_client,
                                                  to_sync=to_sync,
                                                  operation_id=operation_id)
        updated_names = set(updated.keys())

        # Feeds configured to sync but that were not on the upstream source at all
        for feed_name in set(to_sync).difference(updated_names):
            failed.append((feed_name, 'Feed not found on upstream source'))

        # Build the list of feed instances to execute the syncs on
        feeds_to_sync = []
        for feed_name in updated_names:
            try:
                feeds_to_sync.append(feed_instance_by_name(feed_name))
            except KeyError as e:
                logger.error(
                    'Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})'
                    .format(feed_name, str(e), operation_id))
                failed.append((feed_name, e))
            except Exception as e:
                logger.error(
                    'Could not initialize metadata for feed {}. Error: {}. (operation_id={})'
                    .format(feed_name, str(e), operation_id))
                logger.warn(
                    'Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})'
                    .format(feed_name, operation_id))
                failed.append((feed_name, e))

        # Process the feeds that failed for any reason pre-data-download
        result.extend(
            DataFeeds._process_failed_feeds(failed_tuples=failed,
                                            catalog_client=catalog_client,
                                            operation_id=operation_id))

        # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas)
        feeds_to_sync = _ordered_feeds(feeds_to_sync)

        # Do the fetches
        groups_to_download = []
        for f in feeds_to_sync:
            logger.info(
                'Initialized feed to sync: {} (operation_id={})'.format(
                    f.__feed_name__, operation_id))
            if f.metadata:
                if f.metadata.enabled:
                    for g in f.metadata.groups:
                        if g.enabled:
                            groups_to_download.append(g)
                        else:
                            logger.info(
                                "Will not sync/download group {} of feed {} because group is explicitly disabled"
                                .format(g.name, g.feed_name))
                else:
                    logger.info(
                        'Skipping feed {} because it is explicitly not enabled'
                        .format(f.__feed_name__))
            else:
                logger.warn(
                    'No metadata found for feed {}. Unexpected but not an error (operation_id={})'
                    .format(f.__feed_name__, operation_id))

        logger.debug('Groups to download {}'.format(groups_to_download))

        if not feed_client:
            feed_client = get_client()

        base_dir = DataFeeds.__scratch_dir__ if DataFeeds.__scratch_dir__ else localconfig.get_config(
        ).get('tmp_dir')
        download_dir = os.path.join(base_dir, 'policy_engine_tmp',
                                    'feed_syncs')

        feed_data_repo = None
        try:
            # Order by feed
            for f in feeds_to_sync:
                feed_result = build_feed_sync_results(feed=f.__feed_name__,
                                                      status='failure')
                feed_result['status'] = 'success'

                try:
                    # Feed level notification and log msg
                    notify_event(FeedSyncStarted(feed=f.__feed_name__),
                                 catalog_client,
                                 operation_id=operation_id)

                    groups_to_sync = [
                        x for x in groups_to_download
                        if x.feed_name == f.__feed_name__
                    ]
                    logger.debug('Groups to sync {}'.format(groups_to_sync))

                    # Filter groups by that feed
                    for g in groups_to_sync:

                        # Down load just one group into a download result
                        group_download_config = DownloadOperationConfiguration.generate_new(
                            feed_client.feed_url, db_groups_to_sync=[g])
                        downloader = FeedDownloader(
                            download_root_dir=download_dir,
                            config=group_download_config,
                            client=feed_client,
                            fetch_all=full_flush)

                        logger.debug('Groups to download {}'.format(
                            downloader.config.groups))
                        try:
                            notify_event(FeedGroupSyncStarted(feed=g.feed_name,
                                                              group=g.name),
                                         catalog_client,
                                         operation_id=operation_id)

                            logger.info(
                                'Beginning feed data fetch (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))
                            feed_data_repo = downloader.execute(
                                feed_name=g.feed_name, group_name=g.name)

                            logger.info(
                                'Download complete. Syncing to db (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))
                            f_result = DataFeeds.sync_from_fetched(
                                feed_data_repo,
                                catalog_client=catalog_client,
                                operation_id=operation_id,
                                full_flush=full_flush)

                            # Extract the single group record...
                            group_result = _get_group_result(f_result)

                            logger.info(
                                'DB Sync complete (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))

                            if group_result['status'] == 'success':
                                notify_event(FeedGroupSyncCompleted(
                                    feed=f.__feed_name__,
                                    group=g.name,
                                    result=group_result),
                                             catalog_client,
                                             operation_id=operation_id)
                            else:
                                # If any fails, the whole feed is marked as failed
                                feed_result['status'] = 'failure'
                                notify_event(FeedGroupSyncFailed(
                                    feed=f.__feed_name__,
                                    group=g.name,
                                    error='Failed to sync to db'),
                                             catalog_client,
                                             operation_id=operation_id)

                            feed_result['groups'].append(group_result)

                        except Exception as e:
                            logger.error(
                                'Error syncing {}/{} (operation_id={})'.format(
                                    g.feed_name, g.name, operation_id))
                            notify_event(
                                FeedGroupSyncFailed(feed=g.feed_name,
                                                    group=g.name,
                                                    error=e), catalog_client,
                                operation_id)
                            feed_result['status'] = 'failure'
                        finally:
                            try:
                                feed_data_repo.teardown()
                            except:
                                logger.exception(
                                    'Could not cleanup download repo due to error'
                                )

                            feed_data_repo = None

                except Exception as e:
                    logger.error('Error syncing {} (operation_id={})'.format(
                        f, operation_id))

                if feed_result['status'] == 'success':
                    notify_event(FeedSyncCompleted(feed=f.__feed_name__),
                                 catalog_client, operation_id)
                else:
                    notify_event(
                        FeedSyncFailed(
                            feed=f.__feed_name__,
                            error='One or more groups failed to sync'),
                        catalog_client, operation_id)

                result.append(feed_result)
        finally:
            if feed_data_repo:
                feed_data_repo.teardown()

        return result