예제 #1
0
def _check_feed_client_credentials():
    from anchore_engine.services.policy_engine.engine.feeds.client import get_client
    sleep_time = feed_config_check_backoff
    last_ex = None

    for i in range(feed_config_check_retries):
        if i > 0:
            logger.info("Waiting for {} seconds to try feeds client config check again".format(sleep_time))
            time.sleep(sleep_time)
            sleep_time += feed_config_check_backoff

        try:
            logger.info('Checking feeds client credentials. Attempt {} of {}'.format(i + 1, feed_config_check_retries))
            client = get_client()
            client = None
            logger.info('Feeds client credentials ok')
            return True
        except Exception as e:
            logger.warn("Could not verify feeds endpoint and/or config. Got exception: {}".format(e))
            last_ex = e
    else:
        if last_ex:
            raise last_ex
        else:
            raise Exception('Exceeded retries for feeds client config check. Failing check')
def test_feed_sync():
    test_client = get_client(feeds_url=feed_url,
                             user=('*****@*****.**', 'pbiU2RYZ2XrmYQ'),
                             conn_timeout=10,
                             read_timeout=30)
    for f in test_client.list_feeds().feeds:
        try:
            test_client.list_feed_groups(f.name)
        except Exception as e:
            logger.info(('Caught: {} for feed:  {}'.format(e, f)))


    next_token = False
    since_time = None
    feed = 'vulnerabilities'
    group = 'alpine:3.6'
    last_token = None

    while next_token is not None:
        logger.info('Getting a page of data')
        if next_token:
            last_token = next_token
            logger.info('Using token: {}'.format(next_token))
            data = test_client.get_feed_group_data(feed, group, since=since_time, next_token=next_token)
        else:
            last_token = None
            data = test_client.get_feed_group_data(feed, group, since=since_time)

        next_token = data.next_token
        logger.info('Got {} items and new next token: {}'.format(data.record_count, next_token))

        if next_token:
            assert(next_token != last_token)
        assert(len(data.data) > 0)
def test_anon_user():
    test_client = get_client(feeds_url=feed_url,
                             user=('*****@*****.**', 'pbiU2RYZ2XrmYQ'),
                             conn_timeout=10,
                             read_timeout=30)
    for f in test_client.list_feeds().feeds:
        try:
            test_client.list_feed_groups(f.name)
        except Exception as e:
            logger.error(('Caught: {} for feed:  {}'.format(e, f)))
    test_client.get_feed_group_data('vulnerabilities', 'alpine:3.6',since=datetime.datetime.utcnow())
예제 #4
0
    def sync(to_sync=None,
             full_flush=False,
             catalog_client=None,
             feed_client=None,
             operation_id=None):
        """
        Sync all feeds.
        :return:
        """

        result = []

        if not feed_client:
            feed_client = get_client()

        logger.info('Performing sync of feeds: {} (operation_id={})'.format(
            'all' if to_sync is None else to_sync, operation_id))

        updated, failed = DataFeeds.sync_metadata(feed_client=feed_client,
                                                  to_sync=to_sync,
                                                  operation_id=operation_id)
        updated_names = set(updated.keys())

        # Feeds configured to sync but that were not on the upstream source at all
        for feed_name in set(to_sync).difference(updated_names):
            failed.append((feed_name, 'Feed not found on upstream source'))

        # Build the list of feed instances to execute the syncs on
        feeds_to_sync = []
        for feed_name in updated_names:
            try:
                feeds_to_sync.append(feed_instance_by_name(feed_name))
            except KeyError as e:
                logger.error(
                    'Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})'
                    .format(feed_name, str(e), operation_id))
                failed.append((feed_name, e))
            except Exception as e:
                logger.error(
                    'Could not initialize metadata for feed {}. Error: {}. (operation_id={})'
                    .format(feed_name, str(e), operation_id))
                logger.warn(
                    'Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})'
                    .format(feed_name, operation_id))
                failed.append((feed_name, e))

        # Process the feeds that failed for any reason pre-data-download
        result.extend(
            DataFeeds._process_failed_feeds(failed_tuples=failed,
                                            catalog_client=catalog_client,
                                            operation_id=operation_id))

        # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas)
        feeds_to_sync = _ordered_feeds(feeds_to_sync)

        # Do the fetches
        groups_to_download = []
        for f in feeds_to_sync:
            logger.info(
                'Initialized feed to sync: {} (operation_id={})'.format(
                    f.__feed_name__, operation_id))
            if f.metadata:
                if f.metadata.enabled:
                    for g in f.metadata.groups:
                        if g.enabled:
                            groups_to_download.append(g)
                        else:
                            logger.info(
                                "Will not sync/download group {} of feed {} because group is explicitly disabled"
                                .format(g.name, g.feed_name))
                else:
                    logger.info(
                        'Skipping feed {} because it is explicitly not enabled'
                        .format(f.__feed_name__))
            else:
                logger.warn(
                    'No metadata found for feed {}. Unexpected but not an error (operation_id={})'
                    .format(f.__feed_name__, operation_id))

        logger.debug('Groups to download {}'.format(groups_to_download))

        if not feed_client:
            feed_client = get_client()

        base_dir = DataFeeds.__scratch_dir__ if DataFeeds.__scratch_dir__ else localconfig.get_config(
        ).get('tmp_dir')
        download_dir = os.path.join(base_dir, 'policy_engine_tmp',
                                    'feed_syncs')

        feed_data_repo = None
        try:
            # Order by feed
            for f in feeds_to_sync:
                feed_result = build_feed_sync_results(feed=f.__feed_name__,
                                                      status='failure')
                feed_result['status'] = 'success'

                try:
                    # Feed level notification and log msg
                    notify_event(FeedSyncStarted(feed=f.__feed_name__),
                                 catalog_client,
                                 operation_id=operation_id)

                    groups_to_sync = [
                        x for x in groups_to_download
                        if x.feed_name == f.__feed_name__
                    ]
                    logger.debug('Groups to sync {}'.format(groups_to_sync))

                    # Filter groups by that feed
                    for g in groups_to_sync:

                        # Down load just one group into a download result
                        group_download_config = DownloadOperationConfiguration.generate_new(
                            feed_client.feed_url, db_groups_to_sync=[g])
                        downloader = FeedDownloader(
                            download_root_dir=download_dir,
                            config=group_download_config,
                            client=feed_client,
                            fetch_all=full_flush)

                        logger.debug('Groups to download {}'.format(
                            downloader.config.groups))
                        try:
                            notify_event(FeedGroupSyncStarted(feed=g.feed_name,
                                                              group=g.name),
                                         catalog_client,
                                         operation_id=operation_id)

                            logger.info(
                                'Beginning feed data fetch (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))
                            feed_data_repo = downloader.execute(
                                feed_name=g.feed_name, group_name=g.name)

                            logger.info(
                                'Download complete. Syncing to db (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))
                            f_result = DataFeeds.sync_from_fetched(
                                feed_data_repo,
                                catalog_client=catalog_client,
                                operation_id=operation_id,
                                full_flush=full_flush)

                            # Extract the single group record...
                            group_result = _get_group_result(f_result)

                            logger.info(
                                'DB Sync complete (feed={}, group={}, operation_id={})'
                                .format(g.feed_name, g.name, operation_id))

                            if group_result['status'] == 'success':
                                notify_event(FeedGroupSyncCompleted(
                                    feed=f.__feed_name__,
                                    group=g.name,
                                    result=group_result),
                                             catalog_client,
                                             operation_id=operation_id)
                            else:
                                # If any fails, the whole feed is marked as failed
                                feed_result['status'] = 'failure'
                                notify_event(FeedGroupSyncFailed(
                                    feed=f.__feed_name__,
                                    group=g.name,
                                    error='Failed to sync to db'),
                                             catalog_client,
                                             operation_id=operation_id)

                            feed_result['groups'].append(group_result)

                        except Exception as e:
                            logger.error(
                                'Error syncing {}/{} (operation_id={})'.format(
                                    g.feed_name, g.name, operation_id))
                            notify_event(
                                FeedGroupSyncFailed(feed=g.feed_name,
                                                    group=g.name,
                                                    error=e), catalog_client,
                                operation_id)
                            feed_result['status'] = 'failure'
                        finally:
                            try:
                                feed_data_repo.teardown()
                            except:
                                logger.exception(
                                    'Could not cleanup download repo due to error'
                                )

                            feed_data_repo = None

                except Exception as e:
                    logger.error('Error syncing {} (operation_id={})'.format(
                        f, operation_id))

                if feed_result['status'] == 'success':
                    notify_event(FeedSyncCompleted(feed=f.__feed_name__),
                                 catalog_client, operation_id)
                else:
                    notify_event(
                        FeedSyncFailed(
                            feed=f.__feed_name__,
                            error='One or more groups failed to sync'),
                        catalog_client, operation_id)

                result.append(feed_result)
        finally:
            if feed_data_repo:
                feed_data_repo.teardown()

        return result
예제 #5
0
def test_feed_downloader():
    """
    Requires network access to the public feed service ancho.re

    :return:
    """

    groups_to_sync = [
        GroupDownloadOperationConfiguration(
            feed='vulnerabilities',
            group='alpine:3.7',
            parameters=GroupDownloadOperationParams(since=None)),
        GroupDownloadOperationConfiguration(
            feed='vulnerabilities',
            group='alpine:3.8',
            parameters=GroupDownloadOperationParams(since=None)),
        #GroupDownloadOperationConfiguration(feed='nvdv2', group='nvdv2:cves', parameters=GroupDownloadOperationParams(since=None))
    ]
    dl_conf = DownloadOperationConfiguration(groups=groups_to_sync,
                                             uuid=uuid.uuid4().hex,
                                             source_uri=ANCHOREIO_URI)
    tmpdir = tempfile.mkdtemp(prefix='anchoretest_repo-')
    data_repo = None
    try:
        client = get_client(ANCHOREIO_URI,
                            user=('something', 'something'),
                            conn_timeout=1,
                            read_timeout=30)
        fetcher = FeedDownloader(download_root_dir=tmpdir,
                                 config=dl_conf,
                                 client=client,
                                 fetch_all=False)

        with timer('feed download', log_level='info'):
            data_repo = fetcher.execute()

        assert data_repo is not None
        assert data_repo.root_dir.startswith(tmpdir)
        assert data_repo.metadata.data_write_dir.startswith(tmpdir)
        assert os.path.isdir(data_repo.metadata.data_write_dir)
        assert os.path.isdir(data_repo.root_dir)
        assert len(os.listdir(tmpdir)) > 0

        count = 0
        with timer('alpine 3.8 iterate', log_level='info'):
            for _ in data_repo.read('vulnerabilities', 'alpine:3.8', 0):
                count += 1

        assert count == sum([
            x.total_records for x in data_repo.metadata.download_result.results
            if x.feed == 'vulnerabilities' and x.group == 'alpine:3.8'
        ])

        with timer('alpine 3.7 iterate', log_level='info'):
            for _ in data_repo.read('vulnerabilities', 'alpine:3.7', 0):
                count += 1

        assert count == sum([
            x.total_records for x in data_repo.metadata.download_result.results
        ])

    finally:
        logger.info('Cleaning up temp dir')
        if data_repo:
            data_repo.teardown()