コード例 #1
0
class VulnDBFeed(AnchoreServiceFeed):
    """
    Feed for VulnDB data served from on-prem enterprise feed service
    """

    __feed_name__ = 'vulndb'
    _cve_key = 'id'
    __group_data_mappers__ = SingleTypeMapperFactory(__feed_name__, VulnDBFeedDataMapper, _cve_key)

    def _flush_group(self, group_obj, flush_helper_fn=None, operation_id=None):
        db = get_session()

        if flush_helper_fn:
            flush_helper_fn(db=db, feed_name=group_obj.feed_name, group_name=group_obj.name)

        count = db.query(VulnDBCpe).filter(VulnDBCpe.namespace_name == group_obj.name).delete()
        logger.info(log_msg_ctx(operation_id, group_obj.name, group_obj.feed_name, 'Flushed {} VulnDBCpe records'.format(count)))
        count = db.query(VulnDBMetadata).filter(VulnDBMetadata.namespace_name == group_obj.name).delete()
        logger.info(log_msg_ctx(operation_id, group_obj.name, group_obj.feed_name, 'Flushed {} VulnDBMetadata records'.format(count)))

        group_obj.last_sync = None
        group_obj.count = 0
        db.flush()

    def record_count(self, group_name, db):
        try:
            return db.query(VulnDBMetadata).filter(VulnDBMetadata.namespace_name == group_name).count()
        except Exception as e:
            logger.exception('Error getting feed data group record count in vulndb feed for group: {}'.format(group_name))
            raise
コード例 #2
0
class NvdV2Feed(AnchoreServiceFeed):
    """
    Feed for package data, served from the anchore feed service backend
    """

    __feed_name__ = 'nvdv2'
    _cve_key = 'id'
    __group_data_mappers__ = SingleTypeMapperFactory(__feed_name__, NvdV2FeedDataMapper, _cve_key)

    def _flush_group(self, group_obj, flush_helper_fn=None, operation_id=None):
        db = get_session()
        if flush_helper_fn:
            flush_helper_fn(db=db, feed_name=group_obj.feed_name, group_name=group_obj.name)

        count = db.query(CpeV2Vulnerability).filter(CpeV2Vulnerability.namespace_name == group_obj.name).delete()
        logger.info(log_msg_ctx(operation_id, group_obj.name, group_obj.feed_name, 'Flushed {} CpeV2Vuln records'.format(count)))
        count = db.query(NvdV2Metadata).filter(NvdV2Metadata.namespace_name == group_obj.name).delete()
        logger.info(log_msg_ctx(operation_id, group_obj.name, group_obj.feed_name, 'Flushed {} NvdV2 records'.format(count)))

        group_obj.last_sync = None
        group_obj.count = 0
        db.flush()

    def record_count(self, group_name, db):
        try:
            return db.query(NvdV2Metadata).filter(NvdV2Metadata.namespace_name == group_name).count()
        except Exception as e:
            logger.exception('Error getting feed data group record count in package feed for group: {}'.format(group_name))
            raise
コード例 #3
0
ファイル: feeds.py プロジェクト: marvel-works/anchore-engine
class GithubFeed(VulnerabilityFeed):
    """
    Feed for the Github Advisories data
    """

    __feed_name__ = "github"
    _cve_key = "id"
    __group_data_mappers__ = SingleTypeMapperFactory(
        __feed_name__, GithubFeedDataMapper, _cve_key
    )
コード例 #4
0
ファイル: feeds.py プロジェクト: marvel-works/anchore-engine
class VulnerabilityFeed(AnchoreServiceFeed):
    """
    Vulnerabilities feed from anchore feed service backend. Unique in that the records are nested and have structure.
    Each vulnerability record maps to a set of records in the DB: one for the vulnerability and a set for each of the FixedIn and
    VulnerableIn collections that are optionally present for the vulnerability main record.

    """

    __feed_name__ = "vulnerabilities"
    _cve_key = "Name"
    __group_data_mappers__ = SingleTypeMapperFactory(
        __feed_name__, VulnerabilityFeedDataMapper, _cve_key
    )
    __vuln_processing_fn__ = process_updated_vulnerability
    __flush_helper_fn__ = flush_vulnerability_matches

    def _sync_group(
        self,
        group_download_result: GroupDownloadResult,
        full_flush=False,
        local_repo=None,
        operation_id=None,
    ):
        """
        Sync data from a single group and return the data. This operation is scoped to a transaction on the db.

        :param group_download_result
        :return:
        """
        total_updated_count = 0
        result = build_group_sync_result()
        result["group"] = group_download_result.group
        sync_started = None

        db = get_session()
        db.refresh(self.metadata)
        group_db_obj = self.group_by_name(group_download_result.group)

        if not group_db_obj:
            logger.error(
                log_msg_ctx(
                    operation_id,
                    group_download_result.feed,
                    group_download_result.group,
                    "Skipping group sync. Record not found in db, should have been synced already",
                )
            )
            return result

        sync_started = time.time()
        download_started = group_download_result.started.replace(
            tzinfo=datetime.timezone.utc
        )

        try:
            updated_images = (
                set()
            )  # To get unique set of all images updated by this sync

            if full_flush:
                logger.info(
                    log_msg_ctx(
                        operation_id,
                        group_download_result.feed,
                        group_download_result.group,
                        "Performing group data flush prior to sync",
                    )
                )
                self._flush_group(group_db_obj, operation_id=operation_id)

            mapper = self._load_mapper(group_db_obj)

            # Iterate thru the records and commit
            count = 0
            for record in local_repo.read(
                group_download_result.feed, group_download_result.group, 0
            ):
                mapped = mapper.map(record)
                updated_image_ids = self.update_vulnerability(
                    db,
                    mapped,
                    vulnerability_processing_fn=VulnerabilityFeed.__vuln_processing_fn__,
                )
                updated_images = updated_images.union(
                    set(updated_image_ids)
                )  # Record after commit to ensure in-sync.
                merged = db.merge(mapped)
                total_updated_count += 1
                count += 1

                if len(updated_image_ids) > 0:
                    db.flush()  # Flush after every one so that mem footprint stays small if lots of images are updated

                if count >= self.RECORDS_PER_CHUNK:
                    # Commit
                    group_db_obj.count = self.record_count(group_db_obj.name, db)
                    db.commit()
                    logger.info(
                        log_msg_ctx(
                            operation_id,
                            group_download_result.feed,
                            group_download_result.group,
                            "DB Update Progress: {}/{}".format(
                                total_updated_count, group_download_result.total_records
                            ),
                        )
                    )
                    db = get_session()
                    count = 0

            else:
                group_db_obj.count = self.record_count(group_db_obj.name, db)
                db.commit()
                logger.info(
                    log_msg_ctx(
                        operation_id,
                        group_download_result.feed,
                        group_download_result.group,
                        "DB Update Progress: {}/{}".format(
                            total_updated_count, group_download_result.total_records
                        ),
                    )
                )
                db = get_session()

            logger.debug(
                log_msg_ctx(
                    operation_id,
                    group_download_result.feed,
                    group_download_result.group,
                    "Updating last sync timestamp to {}".format(download_started),
                )
            )
            group_db_obj = self.group_by_name(group_download_result.group)
            group_db_obj.last_sync = download_started
            group_db_obj.count = self.record_count(group_db_obj.name, db)
            db.add(group_db_obj)
            db.commit()
        except Exception as e:
            logger.exception(
                log_msg_ctx(
                    operation_id,
                    group_download_result.feed,
                    group_download_result.group,
                    "Error syncing group",
                )
            )
            db.rollback()
            raise e
        finally:
            total_group_time = time.time() - download_started.timestamp()
            sync_time = time.time() - sync_started
            logger.info(
                log_msg_ctx(
                    operation_id,
                    group_download_result.feed,
                    group_download_result.group,
                    "Sync to db duration: {} sec".format(sync_time),
                )
            )
            logger.info(
                log_msg_ctx(
                    operation_id,
                    group_download_result.feed,
                    group_download_result.group,
                    "Total sync, including download, duration: {} sec".format(
                        total_group_time
                    ),
                )
            )

        result["updated_record_count"] = total_updated_count
        result["status"] = "success"
        result["total_time_seconds"] = total_group_time
        result["updated_image_count"] = 0
        return result

    @staticmethod
    def _are_match_equivalent(vulnerability_a, vulnerability_b):
        """
        Returns true if the two records (including child fixedin and/or vulnerablein records) are equivalent in terms of package matching.

        TODO: move this logic to an vuln-scan abstraction, but that abstraction needs more work before it's ready. Would like to keep the definition of what impacts matches centralized so as not to get out-of-sync.

        :param vulnerability_a:
        :param vulnerability_b:
        :return:
        """

        if (
            not (vulnerability_a and vulnerability_b)
            or vulnerability_a.id != vulnerability_b.id
            or vulnerability_a.namespace_name != vulnerability_b.namespace_name
        ):
            # They aren't the same item reference
            logger.debug(
                "Vuln id or namespaces are different: {} {} {} {}".format(
                    vulnerability_a.id,
                    vulnerability_b.id,
                    vulnerability_a.namespace_name,
                    vulnerability_b.namespace_name,
                )
            )
            return False

        normalized_fixes_a = {
            (fix.name, fix.epochless_version, fix.version)
            for fix in vulnerability_a.fixed_in
        }
        normalized_fixes_b = {
            (fix.name, fix.epochless_version, fix.version)
            for fix in vulnerability_b.fixed_in
        }

        fix_diff = normalized_fixes_a.symmetric_difference(normalized_fixes_b)
        if fix_diff:
            logger.debug("Fixed In records diff: {}".format(fix_diff))
            return False

        return True

    def update_vulnerability(
        self, db, vulnerability_record, vulnerability_processing_fn=None
    ):
        """
        Processes a single vulnerability record. Specifically for vulnerabilities:
        Checks and updates any fixed-in or vulnerable-in records and given the final state of the vulneraability,
        calls the item_callback function which is expected to do things like: update image vulnerability lists based
        on the new item.

        :param vulnerability_record: the record from the feed source to process and load into the db.
        :param vulnerability_processing_fn: a callback function to execute with the new date, but before any transaction commit
        :return:
        """
        try:
            updates = []

            try:
                existing = (
                    db.query(Vulnerability)
                    .filter(
                        Vulnerability.id == vulnerability_record.id,
                        Vulnerability.namespace_name
                        == vulnerability_record.namespace_name,
                    )
                    .one_or_none()
                )
            except:
                logger.debug(
                    "No current record found for {}".format(vulnerability_record)
                )
                existing = None

            if existing:
                needs_update = not VulnerabilityFeed._are_match_equivalent(
                    existing, vulnerability_record
                )
                if needs_update:
                    logger.debug(
                        "Found update that requires an image match update from {} to {}".format(
                            existing, vulnerability_record
                        )
                    )
            else:
                needs_update = True

            merged = db.merge(vulnerability_record)

            if vulnerability_processing_fn and needs_update:
                updates = vulnerability_processing_fn(db, merged)
            else:
                logger.debug(
                    "Skipping image processing due to no diff: {}".format(merged)
                )

            return updates
        except Exception as e:
            logger.exception("Error in vulnerability processing")
            raise e

    def _flush_group(self, group_obj, operation_id=None):
        logger.info(
            log_msg_ctx(
                operation_id,
                group_obj.name,
                group_obj.feed_name,
                "Flushing group records",
            )
        )

        db = get_session()

        VulnerabilityFeed.__flush_helper_fn__(
            db=db, feed_name=group_obj.feed_name, group_name=group_obj.name
        )

        count = (
            db.query(FixedArtifact)
            .filter(FixedArtifact.namespace_name == group_obj.name)
            .delete()
        )
        logger.info(
            log_msg_ctx(
                operation_id,
                group_obj.name,
                group_obj.feed_name,
                "Flushed {} fix records".format(count),
            )
        )
        count = (
            db.query(VulnerableArtifact)
            .filter(VulnerableArtifact.namespace_name == group_obj.name)
            .delete()
        )
        logger.info("Flushed %s vuln artifact records", count)
        count = (
            db.query(Vulnerability)
            .filter(Vulnerability.namespace_name == group_obj.name)
            .delete()
        )
        logger.info(
            log_msg_ctx(
                operation_id,
                group_obj.name,
                group_obj.feed_name,
                "Flushed {} vulnerability records".format(count),
            )
        )
        group_obj.last_sync = None  # Null the update timestamp to reflect the flush
        group_obj.count = 0

        db.flush()

    def sync(
        self,
        fetched_data: LocalFeedDataRepo,
        full_flush: bool = False,
        event_client: CatalogClient = None,
        operation_id=None,
        group=None,
    ) -> dict:
        """
        Sync data with the feed source. This may be *very* slow if there are lots of updates.

        Returns a dict with the following structure:
        {
        'group_name': [ record1, record2, ..., recordN],
        'group_name2': [ record1, record2, ...., recordM],
        ...
        }

        :param: group: The group to sync, optionally. If not specified, all groups are synced.
        :return: changed data updated in the sync as a list of records
        """

        if self.metadata and self.metadata.groups:
            # Setup the group name cache
            ThreadLocalFeedGroupNameCache.add(
                [(x.name, x.enabled) for x in self.metadata.groups]
            )
        else:
            ThreadLocalFeedGroupNameCache.flush()

        try:
            return super().sync(
                fetched_data,
                full_flush,
                event_client,
                operation_id=operation_id,
                group=group,
            )
        finally:
            ThreadLocalFeedGroupNameCache.flush()

    def record_count(self, group_name, db):
        try:
            return (
                db.query(Vulnerability)
                .filter(Vulnerability.namespace_name == group_name)
                .count()
            )
        except Exception as e:
            logger.exception(
                "Error getting feed data group record count in package feed for group: {}".format(
                    group_name
                )
            )
            raise