Example #1
0
    def sync_from_fetched(fetched_repo: LocalFeedDataRepo, catalog_client: CatalogClient = None, operation_id=None, full_flush=False):
        """
        Sync the data from a local fetched repo

        :param operation_id:
        :param catalog_client:
        :param fetched_repo:
        :return:
        """
        # Load the feed objects
        if not (fetched_repo.metadata and fetched_repo.metadata.download_result and fetched_repo.metadata.download_result.results):
            raise ValueError('Fetched repo has no download result records')
        else:
            feed_objs = [feed_instance_by_name(f) for f in set([x.feed for x in fetched_repo.metadata.download_result.results])]

        result = []

        for f in feed_objs:
            try:
                t = time.time()
                try:
                    log.info('Syncing downloaded feed data into database (operation_id={})'.format(operation_id))
                    # Do the sync from the local data
                    result.append(f.sync(fetched_data=fetched_repo, event_client=catalog_client, operation_id=operation_id, full_flush=full_flush))
                except Exception as e:
                    log.exception('Failure updating the {} feed from downloaded data (operation_id={})'.format(f.__feed_name__, operation_id))
                    fail_result = build_feed_sync_results(feed=f.__feed_name__)
                    fail_result['total_time_seconds'] = time.time() - t
                    result.append(fail_result)
            except:
                log.exception('Error syncing feed {} (operation_id={})'.format(f.__feed_name__, operation_id))
                raise

        return result
Example #2
0
 def update_counts():
     for feed in get_all_feeds_detached():
         try:
             f = feed_instance_by_name(feed.name)
             f.update_counts()
         except KeyError:
             logger.warn(
                 'Could not find feed instance for name %s. Cannot update counts',
                 feed.name)
Example #3
0
    def evaluate(self, image_obj, context):
        try:
            feed_meta = feed_instance_by_name('packages').group_by_name(FEED_KEY)
            if feed_meta and feed_meta.last_sync:
                return
        except Exception as e:
            log.exception('Error determining feed presence for gems. Defaulting to firing trigger')

        self._fire()
        return
Example #4
0
    def delete_feed(feed_name):
        """

        :param feed_name:
        :return:
        """
        f = feed_instance_by_name(feed_name)
        if not f:
            raise KeyError(feed_name)

        return f.flush_all()
Example #5
0
    def delete_feed_group(feed_name, group_name):
        """

        :param feed_name:
        :param group_name:
        :return:
        """
        # TODO throw exception if feed is grypedb
        f = feed_instance_by_name(feed_name)
        if not f:
            raise KeyError(feed_name)
        return f.flush_group(group_name)
Example #6
0
    def delete_feed_group(feed_name, group_name):
        """

        :param feed_name:
        :param group_name:
        :return:
        """

        f = feed_instance_by_name(feed_name)
        if not f:
            raise KeyError(feed_name)

        return f.flush_group(group_name)
Example #7
0
def test_group_lookups(test_data_env):
    r = DataFeeds.sync_metadata(feed_client=test_data_env.feed_client)
    assert r == empty_metadata_sync_result, 'No metadata should be returned from sync with empty to_sync input'

    r = DataFeeds.sync_metadata(feed_client=test_data_env.feed_client,
                                to_sync=['vulnerabilities'])
    assert r and len(
        r[0]
    ) == 1, 'Metadata should be returned from sync with non-empty to_sync list'

    df = feed_instance_by_name('vulnerabilities')
    assert df is not None, 'vulnerabilities feed instance not loaded'
    assert df.metadata, 'No vuln metadata found'
    logger.info('Vuln feed metadata {}'.format(df.metadata.to_json()))
    assert not df.group_by_name('not_a_real_Group'), 'Found non-existent group'
    assert df.group_by_name('alpine:3.6'), 'Should have found group alpine:3.6'
Example #8
0
def test_group_lookups(test_data_env):
    r = DataFeeds.sync_metadata(feed_client=test_data_env.feed_client)
    assert (
        r == empty_metadata_sync_result
    ), "No metadata should be returned from sync with empty to_sync input"

    r = DataFeeds.sync_metadata(feed_client=test_data_env.feed_client,
                                to_sync=["vulnerabilities"])
    assert (
        r and len(r[0]) == 1
    ), "Metadata should be returned from sync with non-empty to_sync list"

    df = feed_instance_by_name("vulnerabilities")
    assert df is not None, "vulnerabilities feed instance not loaded"
    assert df.metadata, "No vuln metadata found"
    logger.info("Vuln feed metadata {}".format(df.metadata.to_json()))
    assert not df.group_by_name("not_a_real_Group"), "Found non-existent group"
    assert df.group_by_name("alpine:3.6"), "Should have found group alpine:3.6"
Example #9
0
    def sync(to_sync=None,
             full_flush=False,
             catalog_client=None,
             feed_client=None,
             operation_id=None):
        """
        Sync all feeds.
        :return:
        """

        result = []

        if not feed_client:
            feed_client = get_client()

        logger.info('Performing sync of feeds: {} (operation_id={})'.format(
            'all' if to_sync is None else to_sync, operation_id))

        updated, failed = DataFeeds.sync_metadata(feed_client=feed_client,
                                                  to_sync=to_sync,
                                                  operation_id=operation_id)
        updated_names = set(updated.keys())

        # Feeds configured to sync but that were not on the upstream source at all
        for feed_name in set(to_sync).difference(updated_names):
            failed.append((feed_name, 'Feed not found on upstream source'))

        # Build the list of feed instances to execute the syncs on
        feeds_to_sync = []
        for feed_name in updated_names:
            try:
                feeds_to_sync.append(feed_instance_by_name(feed_name))
            except KeyError as e:
                logger.error(
                    'Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})'
                    .format(feed_name, str(e), operation_id))
                failed.append((feed_name, e))
            except Exception as e:
                logger.error(
                    'Could not initialize metadata for feed {}. Error: {}. (operation_id={})'
                    .format(feed_name, str(e), operation_id))
                logger.warn(
                    'Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})'
                    .format(feed_name, operation_id))
                failed.append((feed_name, e))

        # Process the feeds that failed for any reason pre-data-download
        result.extend(
            DataFeeds._process_failed_feeds(failed_tuples=failed,
                                            catalog_client=catalog_client,
                                            operation_id=operation_id))

        # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas)
        feeds_to_sync = _ordered_feeds(feeds_to_sync)

        # Do the fetches
        groups_to_download = []
        for f in feeds_to_sync:
            logger.info(
                'Initialized feed to sync: {} (operation_id={})'.format(
                    f.__feed_name__, operation_id))
            if f.metadata:
                if f.metadata.enabled:
                    for g in f.metadata.groups:
                        if g.enabled:
                            groups_to_download.append(g)
                        else:
                            logger.info(
                                "Will not sync/download group {} of feed {} because group is explicitly disabled"
                                .format(g.name, g.feed_name))
                else:
                    logger.info(
                        'Skipping feed {} because it is explicitly not enabled'
                        .format(f.__feed_name__))
            else:
                logger.warn(
                    'No metadata found for feed {}. Unexpected but not an error (operation_id={})'
                    .format(f.__feed_name__, operation_id))

        logger.debug('Groups to download {}'.format(groups_to_download))

        if not feed_client:
            feed_client = get_client()

        base_dir = DataFeeds.__scratch_dir__ if DataFeeds.__scratch_dir__ else localconfig.get_config(
        ).get('tmp_dir')
        download_dir = os.path.join(base_dir, 'policy_engine_tmp',
                                    'feed_syncs')

        feed_data_repo = None
        try:
            # Order by feed
            for f in feeds_to_sync:
                feed_result = build_feed_sync_results(feed=f.__feed_name__,
                                                      status='failure')
                feed_result['status'] = 'success'

                try:
                    # Feed level notification and log msg
                    notify_event(FeedSyncStarted(feed=f.__feed_name__),
                                 catalog_client,
                                 operation_id=operation_id)

                    groups_to_sync = [
                        x for x in groups_to_download
                        if x.feed_name == f.__feed_name__
                    ]
                    logger.debug('Groups to sync {}'.format(groups_to_sync))

                    # Filter groups by that feed
                    for g in groups_to_sync:

                        # Down load just one group into a download result
                        group_download_config = DownloadOperationConfiguration.generate_new(
                            feed_client.feed_url, db_groups_to_sync=[g])
                        downloader = FeedDownloader(
                            download_root_dir=download_dir,
                            config=group_download_config,
                            client=feed_client,
                            fetch_all=full_flush)

                        logger.debug('Groups to download {}'.format(
                            downloader.config.groups))
                        try:
                            notify_event(FeedGroupSyncStarted(feed=g.feed_name,
                                                              group=g.name),
                                         catalog_client,
                                         operation_id=operation_id)

                            logger.info(
                                'Beginning feed data fetch (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))
                            feed_data_repo = downloader.execute(
                                feed_name=g.feed_name, group_name=g.name)

                            logger.info(
                                'Download complete. Syncing to db (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))
                            f_result = DataFeeds.sync_from_fetched(
                                feed_data_repo,
                                catalog_client=catalog_client,
                                operation_id=operation_id,
                                full_flush=full_flush)

                            # Extract the single group record...
                            group_result = _get_group_result(f_result)

                            logger.info(
                                'DB Sync complete (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))

                            if group_result['status'] == 'success':
                                notify_event(FeedGroupSyncCompleted(
                                    feed=f.__feed_name__,
                                    group=g.name,
                                    result=group_result),
                                             catalog_client,
                                             operation_id=operation_id)
                            else:
                                # If any fails, the whole feed is marked as failed
                                feed_result['status'] = 'failure'
                                notify_event(FeedGroupSyncFailed(
                                    feed=f.__feed_name__,
                                    group=g.name,
                                    error='Failed to sync to db'),
                                             catalog_client,
                                             operation_id=operation_id)

                            feed_result['groups'].append(group_result)

                        except Exception as e:
                            logger.error(
                                'Error syncing {}/{} (operation_id={})'.format(
                                    g.feed_name, g.name, operation_id))
                            notify_event(
                                FeedGroupSyncFailed(feed=g.feed_name,
                                                    group=g.name,
                                                    error=e), catalog_client,
                                operation_id)
                            feed_result['status'] = 'failure'
                        finally:
                            try:
                                feed_data_repo.teardown()
                            except:
                                logger.exception(
                                    'Could not cleanup download repo due to error'
                                )

                            feed_data_repo = None

                except Exception as e:
                    logger.error('Error syncing {} (operation_id={})'.format(
                        f, operation_id))

                if feed_result['status'] == 'success':
                    notify_event(FeedSyncCompleted(feed=f.__feed_name__),
                                 catalog_client, operation_id)
                else:
                    notify_event(
                        FeedSyncFailed(
                            feed=f.__feed_name__,
                            error='One or more groups failed to sync'),
                        catalog_client, operation_id)

                result.append(feed_result)
        finally:
            if feed_data_repo:
                feed_data_repo.teardown()

        return result
Example #10
0
 def records_for(feed_name, group_name):
     try:
         return feed_instance_by_name(feed_name).record_count(group_name)
     except KeyError as e:
         log.debug('cannot compute record count for unknown feed: {}'.format(e))
         return 0
Example #11
0
    def sync(
        sync_util_provider: SyncUtilProvider,
        full_flush: bool = False,
        catalog_client: CatalogClient = None,
        operation_id: Optional[str] = None,
    ) -> List[FeedSyncResult]:
        """
        Sync all feeds.

        :param sync_util_provider: provider for sync utils (switches logic for legacy / grypedb feeds)
        :type sync_util_provider: SyncUtilProvider
        :param full_flush: whether not not to flush out the existing records before sync
        :type full_flush: bool
        :param catalog_client: catalog client
        :type catalog_client: CatalogClient
        :param operation_id: UUID4 hexadecimal string representing this operation
        :type operation_id: Optional[str]
        :return: list of FeedSyncResult
        :rtype: List[FeedSyncResult]
        """
        result = []
        to_sync = sync_util_provider.to_sync
        if not to_sync:
            return result
        feed_client = sync_util_provider.get_client()

        logger.info(
            "Performing sync of feeds: {} (operation_id={})".format(
                "all" if to_sync is None else to_sync, operation_id
            )
        )
        source_feeds = DataFeeds.get_feed_group_information(feed_client, to_sync)
        updated, failed = sync_util_provider.sync_metadata(source_feeds, operation_id)
        updated_names = set(updated.keys())

        # Feeds configured to sync but that were not on the upstream source at all
        for feed_name in set(to_sync).difference(updated_names):
            failed.append((feed_name, "Feed not found on upstream source"))

        # Build the list of feed instances to execute the syncs on
        feeds_to_sync = []
        for feed_name in updated_names:
            try:
                feeds_to_sync.append(feed_instance_by_name(feed_name))
            except KeyError as e:
                logger.error(
                    "Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})".format(
                        feed_name, str(e), operation_id
                    )
                )
                failed.append((feed_name, e))
            except Exception as e:
                logger.error(
                    "Could not initialize metadata for feed {}. Error: {}. (operation_id={})".format(
                        feed_name, str(e), operation_id
                    )
                )
                logger.warn(
                    "Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})".format(
                        feed_name, operation_id
                    )
                )
                failed.append((feed_name, e))

        # Process the feeds that failed for any reason pre-data-download
        result.extend(
            DataFeeds._process_failed_feeds(
                failed_tuples=failed,
                catalog_client=catalog_client,
                operation_id=operation_id,
            )
        )

        # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas)
        feeds_to_sync = _ordered_feeds(feeds_to_sync)

        groups_to_download = sync_util_provider.get_groups_to_download(
            source_feeds, feeds_to_sync, operation_id
        )

        logger.debug("Groups to download {}".format(groups_to_download))

        base_dir = (
            DataFeeds.__scratch_dir__
            if DataFeeds.__scratch_dir__
            else localconfig.get_config().get("tmp_dir")
        )
        download_dir = os.path.join(base_dir, "policy_engine_tmp", "feed_syncs")

        feed_data_repo = None
        try:
            # Order by feed
            for f in feeds_to_sync:
                feed_result = FeedSyncResult(feed=f.__feed_name__, status="success")

                try:
                    # Feed level notification and log msg
                    notify_event(
                        FeedSyncStarted(feed=f.__feed_name__),
                        catalog_client,
                        operation_id=operation_id,
                    )

                    groups_to_sync = [
                        x for x in groups_to_download if x.feed_name == f.__feed_name__
                    ]
                    logger.debug("Groups to sync {}".format(groups_to_sync))

                    # Filter groups by that feed
                    for g in groups_to_sync:

                        # Down load just one group into a download result
                        group_download_config = download_operation_config_factory(
                            feed_client.feed_url, db_groups_to_sync=[g]
                        )
                        downloader = FeedDownloader(
                            download_root_dir=download_dir,
                            config=group_download_config,
                            client=feed_client,
                            fetch_all=full_flush,
                        )

                        logger.debug(
                            "Groups to download {}".format(downloader.config.groups)
                        )
                        try:
                            notify_event(
                                FeedGroupSyncStarted(feed=g.feed_name, group=g.name),
                                catalog_client,
                                operation_id=operation_id,
                            )

                            logger.info(
                                "Beginning feed data fetch (feed={}, group={}, operation_id={})".format(
                                    g.feed_name, g.name, operation_id
                                )
                            )
                            feed_data_repo = downloader.execute(
                                feed_name=g.feed_name, group_name=g.name
                            )

                            logger.info(
                                "Download complete. Syncing to db (feed={}, group={}, operation_id={})".format(
                                    g.feed_name, g.name, operation_id
                                )
                            )
                            f_result = DataFeeds.sync_from_fetched(
                                feed_data_repo,
                                catalog_client=catalog_client,
                                operation_id=operation_id,
                                full_flush=full_flush,
                            )

                            # Extract the single group record...
                            group_result = sync_util_provider.retrieve_group_result(
                                f_result, g
                            )

                            logger.info(
                                "DB Sync complete (feed={}, group={}, operation_id={})".format(
                                    g.feed_name, g.name, operation_id
                                )
                            )

                            if group_result.status == "success":
                                notify_event(
                                    FeedGroupSyncCompleted(
                                        feed=f.__feed_name__,
                                        group=g.name,
                                        result=asdict(group_result),
                                    ),
                                    catalog_client,
                                    operation_id=operation_id,
                                )
                            else:
                                # If any fails, the whole feed is marked as failed
                                feed_result.status = "failure"
                                notify_event(
                                    FeedGroupSyncFailed(
                                        feed=f.__feed_name__,
                                        group=g.name,
                                        error="Failed to sync to db",
                                    ),
                                    catalog_client,
                                    operation_id=operation_id,
                                )

                            sync_util_provider.update_feed_result(
                                feed_result, f_result, group_result
                            )

                        except Exception as e:
                            logger.error(
                                "Error syncing {}/{} (operation_id={})".format(
                                    g.feed_name, g.name, operation_id
                                )
                            )
                            notify_event(
                                FeedGroupSyncFailed(
                                    feed=g.feed_name, group=g.name, error=e
                                ),
                                catalog_client,
                                operation_id,
                            )
                            feed_result.status = "failure"
                        finally:
                            try:
                                feed_data_repo.teardown()
                            except Exception:
                                logger.exception(
                                    "Could not cleanup download repo due to error"
                                )

                            feed_data_repo = None

                except Exception:
                    logger.exception(
                        "Error syncing {} (operation_id={})".format(f, operation_id)
                    )

                if feed_result.status == "success":
                    notify_event(
                        FeedSyncCompleted(feed=f.__feed_name__),
                        catalog_client,
                        operation_id,
                    )
                else:
                    notify_event(
                        FeedSyncFailed(
                            feed=f.__feed_name__,
                            error="One or more groups failed to sync",
                        ),
                        catalog_client,
                        operation_id,
                    )

                result.append(feed_result)
        finally:
            if feed_data_repo:
                feed_data_repo.teardown()

        return result
Example #12
0
 def update_counts():
     for feed in get_all_feeds_detached():
         f = feed_instance_by_name(feed.name)
         f.update_counts()