Exemple #1
0
    def refresh_groups(self):
        group_list = self.source.list_feed_groups(self.__feed_name__)

        for group in group_list:
            my_group = self.group_by_name(group.name)
            if not my_group:
                g = FeedGroupMetadata(name=group.name,
                                      description=group.description,
                                      access_tier=group.access_tier,
                                      feed=self.metadata)
                g.last_sync = None
    def _sync_feed_group_metadata(
        db: Session,
        feed_api_record: Dict[str, Union[FeedAPIRecord, List[FeedAPIGroupRecord]]],
        db_feeds: Dict[str, FeedMetadata],
        operation_id: Optional[str] = None,
    ) -> None:
        """
        Add FeedGroupMetadata records to DB if they don't already exist

        :param db: database session
        :type db: Session
        :param feed_api_record: data from API client
        :type feed_api_record: Dict[str, Union[FeedAPIRecord, List[FeedAPIGroupRecord]]]
        :param db_feeds: map of feed names to FeedMetadata tied to DB session
        :type db_feeds: Dict[str, FeedMetadata]
        :param operation_id: UUID4 hexadecimal string
        :type operation_id: Optional[str]
        """
        api_feed = feed_api_record["meta"]
        db_feed = db_feeds.get(api_feed.name)
        # Check for any update
        db_feed.description = api_feed.description
        db_feed.access_tier = api_feed.access_tier

        db_groups = {x.name: x for x in db_feed.groups}
        for api_group in feed_api_record.get("groups", []):
            db_group = db_groups.get(api_group.name)
            # Do this instead of a db.merge() to ensure no timestamps are reset or overwritten
            if not db_group:
                logger.debug(
                    "Adding new feed metadata record to db: {} (operation_id={})".format(
                        api_group.name, operation_id
                    )
                )
                db_group = FeedGroupMetadata(
                    name=api_group.name,
                    description=api_group.description,
                    access_tier=api_group.access_tier,
                    feed=db_feed,
                    enabled=True,
                )
                db_group.last_sync = None
                db.add(db_group)
            else:
                logger.debug(
                    "Feed group metadata already in db: {} (operation_id={})".format(
                        api_group.name, operation_id
                    )
                )

            db_group.access_tier = api_group.access_tier
            db_group.description = api_group.description
Exemple #3
0
def mock_feed_metadata(anchore_db):
    """
    Fixture for delivering mock feed and feed group metadata for metadata ops

    :param anchore_db:
    :return:
    """
    feed_names = []
    with session_scope() as db:
        for f in mock_feeds:
            feed_names.append(f['name'])
            feed = FeedMetadata()
            feed.name = f['name']
            feed.description = f['description']
            feed.enabled = True
            feed.access_tier = 0
            feed.groups = []

            for grp in f['groups']:
                g = FeedGroupMetadata()
                g.name = grp['name']
                g.access_tier = 0
                g.description = ''
                g.enabled = True
                g.feed_name = feed.name

    return feed_names
 def test_get_groups_to_download_legacy(self):
     feed_group_metadata = [
         FeedGroupMetadata(name="vulnerabilities:alpine:3.10",
                           enabled=True),
         FeedGroupMetadata(name="vulnerabilities:alpine:3.11",
                           enabled=True),
     ]
     feeds_to_sync = [
         VulnerabilityFeed(metadata=FeedMetadata(
             name="vulnerabilities",
             enabled=True,
             groups=feed_group_metadata,
         ))
     ]
     sync_config = {
         "vulnerabilities": SyncConfig(enabled=True, url="www.anchore.com")
     }
     groups_to_download = LegacySyncUtilProvider(
         sync_config).get_groups_to_download({}, feeds_to_sync, "0")
     assert groups_to_download == feed_group_metadata
    def get_groups_to_download(
        source_feeds: Dict[
            str, Dict[str, Union[FeedAPIRecord, List[FeedAPIGroupRecord]]]
        ],
        feeds_to_sync: List[DataFeed],
        operation_id: str,
    ) -> List[FeedGroupMetadata]:
        """
        Creates a FeedGroupMetadata record that is never added to the database. We purposefully avoid adding the feed
        attribute to the record so that this record does not get created implicitly by sqlalchemy back-population.
        Uses FeedMetadata from feeds_to_sync. Expects only one record is present for grypedb.

        :param source_feeds: mapping containing FeedAPIRecord and FeedAPIGroupRecord
        :type source_feeds: Dict[str, Dict[str, Union[FeedAPIRecord, List[FeedAPIGroupRecord]]]]
        :param feeds_to_sync: ordered list of DataFeed(s) to sync
        :type feeds_to_sync: List[DataFeed]
        :param operation_id: UUID4 hexadecimal string
        :type operation_id: Optional[str]
        :return:
        """
        # TODO consider throwing exceptions if length is not 1 for these
        api_feed_group = source_feeds[GRYPE_DB_FEED_NAME]["groups"][0]
        feed_metadata = feeds_to_sync[0].metadata
        groups_to_download = []
        if feed_metadata.enabled:
            groups_to_download.append(
                FeedGroupMetadata(
                    name=api_feed_group.name,
                    feed_name=feed_metadata.name,
                    description=api_feed_group.description,
                    access_tier=api_feed_group.access_tier,
                    enabled=True,
                )
            )
        else:
            logger.info(
                "Will not sync/download feed %s because feed is explicitly disabled",
                feed_metadata.name,
            )
        return groups_to_download
Exemple #6
0
    def sync_metadata(feed_client: IFeedSource,
                      to_sync: list = None,
                      operation_id=None) -> tuple:
        """
        Get metadata from source and sync db metadata records to that (e.g. add any new groups or feeds)
        Executes as a unit-of-work for db, so will commit result and returns the records found on upstream source.

        If a record exists in db but was not found upstream, it is not returned

        :param feed_client:
        :param to_sync: list of string feed names to sync metadata on
        :return: tuple, first element: dict of names mapped to db records post-sync only including records successfully updated by upstream, second element is a list of tuples where each tuple is (failed_feed_name, error_obj)
        """

        if not to_sync:
            return {}, []

        db = get_session()
        try:
            logger.info(
                'Syncing feed and group metadata from upstream source (operation_id={})'
                .format(operation_id))

            source_resp = feed_client.list_feeds()
            if to_sync:
                feeds = filter(lambda x: x.name in to_sync, source_resp.feeds)
            else:
                feeds = []

            failed = []
            source_feeds = {
                x.name: {
                    'meta': x,
                    'groups': feed_client.list_feed_groups(x.name).groups
                }
                for x in feeds
            }
            logger.debug('Upstream feeds available: %s', source_feeds)
            db_feeds = DataFeeds._pivot_and_filter_feeds_by_config(
                to_sync, list(source_feeds.keys()), get_all_feeds(db))

            for feed_name, feed_api_record in source_feeds.items():
                try:
                    logger.info(
                        'Syncing metadata for feed: {} (operation_id={})'.
                        format(feed_name, operation_id))

                    api_feed = feed_api_record['meta']
                    db_feed = db_feeds.get(api_feed.name)

                    # Do this instead of a db.merge() to ensure no timestamps are reset or overwritten
                    if not db_feed:
                        logger.debug(
                            'Adding new feed metadata record to db: {} (operation_id={})'
                            .format(api_feed.name, operation_id))
                        db_feed = FeedMetadata(
                            name=api_feed.name,
                            description=api_feed.description,
                            access_tier=api_feed.access_tier,
                            enabled=True)
                        db.add(db_feed)
                        db.flush()
                    else:
                        logger.debug(
                            'Feed metadata already in db: {} (operation_id={})'
                            .format(api_feed.name, operation_id))

                    # Check for any update
                    db_feed.description = api_feed.description
                    db_feed.access_tier = api_feed.access_tier

                    db_groups = {x.name: x for x in db_feed.groups}
                    for api_group in feed_api_record.get('groups', []):
                        db_group = db_groups.get(api_group.name)
                        # Do this instead of a db.merge() to ensure no timestamps are reset or overwritten
                        if not db_group:
                            logger.debug(
                                'Adding new feed metadata record to db: {} (operation_id={})'
                                .format(api_group.name, operation_id))
                            db_group = FeedGroupMetadata(
                                name=api_group.name,
                                description=api_group.description,
                                access_tier=api_group.access_tier,
                                feed=db_feed,
                                enabled=True)
                            db_group.last_sync = None
                            db.add(db_group)
                        else:
                            logger.debug(
                                'Feed group metadata already in db: {} (operation_id={})'
                                .format(api_group.name, operation_id))

                        db_group.access_tier = api_group.access_tier
                        db_group.description = api_group.description
                except Exception as e:
                    logger.exception('Error syncing feed {}'.format(feed_name))
                    logger.warn(
                        'Could not sync metadata for feed: {} (operation_id={})'
                        .format(feed_name, operation_id))
                    failed.append((feed_name, e))
                finally:
                    db.flush()

            # Reload
            db_feeds = DataFeeds._pivot_and_filter_feeds_by_config(
                to_sync, list(source_feeds.keys()), get_all_feeds(db))

            db.commit()
            logger.info(
                'Metadata sync from feeds upstream source complete (operation_id={})'
                .format(operation_id))
            return db_feeds, failed
        except Exception as e:
            logger.error(
                'Rolling back feed metadata update due to error: {} (operation_id={})'
                .format(e, operation_id))
            db.rollback()
            raise
Exemple #7
0
mock_feeds = [
    FeedMetadata(name='vulnerabilities',
                 description='Test version of vulnerabilities feed',
                 access_tier=0,
                 enabled=True),
    FeedMetadata(name='github',
                 description='Test version of github feed',
                 access_tier=0,
                 enabled=True)
]

mock_feed_groups = [
    FeedGroupMetadata(name='debian:8',
                      feed_name='vulnerabilities',
                      enabled=True,
                      description='Fake debian 8 vuln data',
                      access_tier=0),
    FeedGroupMetadata(name='debian:9',
                      feed_name='vulnerabilities',
                      enabled=True,
                      description='Fake debian 9 vuln data',
                      access_tier=0),
    FeedGroupMetadata(name='github:pip',
                      feed_name='github',
                      enabled=True,
                      description='Github python/pip data',
                      access_tier=0)
]

mock_vulnerabilities = [
class TestSyncUtilProvider:
    @pytest.mark.parametrize(
        "sync_util_provider, sync_configs, expected_to_sync_after_filtering",
        [
            (
                LegacySyncUtilProvider,
                {
                    "packages": SyncConfig(url="www.anchore.com", enabled=True)
                },
                ["packages"],
            ),
            (
                LegacySyncUtilProvider,
                {
                    "nvdv2":
                    SyncConfig(url="www.anchore.com", enabled=True),
                    "vulnerabilities":
                    SyncConfig(url="www.anchore.com", enabled=True),
                },
                ["nvdv2", "vulnerabilities"],
            ),
            (
                GrypeDBSyncUtilProvider,
                {
                    "grypedb": SyncConfig(url="www.anchore.com", enabled=True)
                },
                ["grypedb"],
            ),
            (
                GrypeDBSyncUtilProvider,
                {
                    "grypedb": SyncConfig(url="www.anchore.com", enabled=True),
                    "packages": SyncConfig(url="www.anchore.com",
                                           enabled=True),
                },
                ["grypedb"],
            ),
        ],
    )
    def test_get_filtered_sync_configs(
        self,
        sync_util_provider: Type[SyncUtilProvider],
        sync_configs: Dict[str, SyncConfig],
        expected_to_sync_after_filtering: List[str],
    ):
        """
        This is a bit confusing and probably should be changed, which is why i've written a test for it.
        There are two SyncUtilProviders.
        The LegacySyncUtilProvider works for all feeds that follow the legacy format.
        The GrypeDBSyncUtilProvider works for the GrypeDB feed format.
        However, the VulnerabilitiesProvider has two implementations.
        The LegacyProvider contains all vulnerability logic that changes when the provider is set to "legacy"
        The GrypeProvider contains all vulnerability logic that changes when the provider is set to "grype"
        As such, the GrypeProvider actually returns both "packages" and "grypedb" SyncConfigs,
        while "packages" is actually a Legacy style feed.
        Meanwhile, the "packages" feed can only be synced by the LegacySyncUtilProvider.
        The solution is likely to wrap the entire sync method with the SyncUtilProvider, that way LegacySyncUtilProvider
        can just do legacy feeds, while GrypeDBSyncUtilProvider will first do "grypedb" feed with the grype logic
        and then do "packages" feed with the legacy logic.
        """
        filtered_configs = sync_util_provider._get_filtered_sync_configs(
            sync_configs)
        assert set(filtered_configs) == set(expected_to_sync_after_filtering)

    @pytest.mark.parametrize(
        "sync_util_provider, sync_configs, expected_client_class",
        [
            (
                LegacySyncUtilProvider,
                {
                    "vulnerabilities":
                    SyncConfig(url="www.anchore.com", enabled=True)
                },
                FeedServiceClient,
            ),
            (
                GrypeDBSyncUtilProvider,
                {
                    "grypedb": SyncConfig(url="www.anchore.com", enabled=True)
                },
                GrypeDBServiceClient,
            ),
        ],
    )
    def test_get_client(
        self,
        sync_util_provider: Type[SyncUtilProvider],
        sync_configs: Dict[str, SyncConfig],
        expected_client_class: Type[IFeedSource],
    ):
        client = sync_util_provider(sync_configs).get_client()
        assert isinstance(client, expected_client_class)

    @pytest.mark.parametrize(
        "metadata, expected_number_groups, expected_feed_group_metadata",
        [
            (
                FeedMetadata(name="grypedb", enabled=True),
                1,
                FeedGroupMetadata(name="grypedb:vulnerabilities",
                                  feed_name="grypedb",
                                  enabled=True),
            ),
            (FeedMetadata(name="grypedb", enabled=False), 0, None),
        ],
    )
    def test_get_groups_to_download_grype(
        self,
        metadata: FeedMetadata,
        expected_number_groups: int,
        expected_feed_group_metadata: FeedMetadata,
    ):
        source_feeds = {
            "grypedb": {
                "meta":
                FeedList(feeds=[
                    FeedAPIRecord(
                        name="grypedb",
                        description="grypedb feed",
                        access_tier="0",
                    )
                ]),
                "groups": [
                    FeedAPIGroupRecord(
                        name="grypedb:vulnerabilities",
                        description="grypedb:vulnerabilities group",
                        access_tier="0",
                        grype_listing=GrypeDBListing(
                            built=anchore_now_datetime(),
                            version="2",
                            url="www.anchore.com",
                            checksum="sha256:xxx",
                        ),
                    )
                ],
            }
        }
        feeds_to_sync = [GrypeDBFeed(metadata=metadata)]
        sync_config = {
            "grypedb": SyncConfig(enabled=True, url="www.anchore.com")
        }
        groups_to_download = GrypeDBSyncUtilProvider(
            sync_config).get_groups_to_download(source_feeds, feeds_to_sync,
                                                "0")
        assert len(groups_to_download) == expected_number_groups
        if expected_number_groups > 0:
            group = groups_to_download[0]
            assert group.enabled == expected_feed_group_metadata.enabled
            assert group.feed_name == expected_feed_group_metadata.feed_name
            assert group.name == expected_feed_group_metadata.name

    def test_get_groups_to_download_legacy(self):
        feed_group_metadata = [
            FeedGroupMetadata(name="vulnerabilities:alpine:3.10",
                              enabled=True),
            FeedGroupMetadata(name="vulnerabilities:alpine:3.11",
                              enabled=True),
        ]
        feeds_to_sync = [
            VulnerabilityFeed(metadata=FeedMetadata(
                name="vulnerabilities",
                enabled=True,
                groups=feed_group_metadata,
            ))
        ]
        sync_config = {
            "vulnerabilities": SyncConfig(enabled=True, url="www.anchore.com")
        }
        groups_to_download = LegacySyncUtilProvider(
            sync_config).get_groups_to_download({}, feeds_to_sync, "0")
        assert groups_to_download == feed_group_metadata
Exemple #9
0
        access_tier=0,
        enabled=True,
    ),
    FeedMetadata(
        name="github",
        description="Test version of github feed",
        access_tier=0,
        enabled=True,
    ),
]

mock_feed_groups = [
    FeedGroupMetadata(
        name="debian:8",
        feed_name="vulnerabilities",
        enabled=True,
        description="Fake debian 8 vuln data",
        access_tier=0,
    ),
    FeedGroupMetadata(
        name="debian:9",
        feed_name="vulnerabilities",
        enabled=True,
        description="Fake debian 9 vuln data",
        access_tier=0,
    ),
    FeedGroupMetadata(
        name="github:pip",
        feed_name="github",
        enabled=True,
        description="Github python/pip data",