Beispiel #1
0
    def store(self):
        """Sync all queued repositories. Process repositories in batches due to disk space and memory usage."""
        self.logger.log("Checking %d repositories." % len(self.repositories))

        # Fetch current list of repositories from DB
        self.db_repositories = self.repo_store.list_repositories()

        # Download all repomd files first
        failed = self._download_repomds()
        self.logger.log("%d repomd.xml files failed to download." % len(failed))
        self._read_repomds(failed)

        # Filter all repositories without repomd attribute set (failed download, downloaded repomd is not newer)
        batches = BatchList()
        to_skip = []
        for repository in self.repositories:
            if repository.repomd:
                batches.add_item(repository)
            else:
                to_skip.append(repository)
        self.clean_repodata(to_skip)
        self.logger.log("%d repositories skipped." % len(to_skip))
        self.logger.log("Syncing %d repositories." % sum(len(l) for l in batches))

        # Download and process repositories in batches (unpacked metadata files can consume lot of disk space)
        for batch in batches:
            self._download_metadata(batch)
            self._unpack_metadata(batch)
            for repository in batch:
                repository.load_metadata()
                self.repo_store.store(repository)
                repository.unload_metadata()
            self.clean_repodata(batch)
Beispiel #2
0
    def store(self):
        """Sync all queued CVE lists. Runs in batches due to disk space and memory usage."""
        self.logger.info("Checking %d CVE lists.", len(self.repos))

        # Download all repomd files first
        failed = self._download_meta()
        if failed:
            FAILED_NIST.inc()
            self.logger.warning("%d meta files failed to download.",
                                len(failed))
        self._read_meta(failed)

        # filter out failed / unchanged lists
        batches = BatchList()
        to_skip = []
        for repo in self.repos:
            if repo.meta:
                batches.add_item(repo)
            else:
                to_skip.append(repo)
        self.clean_repo(to_skip)
        self.logger.info("%d CVE lists skipped.", len(to_skip))
        self.logger.info("Syncing %d CVE lists.", sum(len(l) for l in batches))

        # Download and process repositories in batches (unpacked metadata files can consume lot of disk space)
        for batch in batches:
            try:
                self._download_json(batch)
                self._unpack_json(batch)
                for repo in sorted(batch, key=lambda repo: repo.label):
                    repo.load_json()
                    self.cverepo_store.store(repo)
                    repo.unload_json()
            finally:
                self.clean_repo(batch)
    def store(self):
        """Sync all queued repositories. Process repositories in batches due to disk space and memory usage."""
        self.logger.info("Checking %d repositories.", len(self.repositories))

        self._write_certificate_cache()

        # Download all repomd files first
        failed = self._download_repomds()
        if failed:
            self.logger.warning("%d repomd.xml files failed to download.",
                                len(failed))
            failed_repos = [
                repo for repo in self.repositories
                if self._repo_download_failed(repo, failed)
            ]
            self.clean_repodata(failed_repos)

        self._read_repomds()
        # Filter all repositories without repomd attribute set (failed download, downloaded repomd is not newer)
        batches = BatchList()
        to_skip = []
        for repository in self.repositories:
            if repository.repomd:
                batches.add_item(repository)
            else:
                to_skip.append(repository)
        self.clean_repodata(to_skip)
        self.logger.info("%d repositories skipped.", len(to_skip))
        self.logger.info("Syncing %d repositories.",
                         sum(len(l) for l in batches))

        # Download and process repositories in batches (unpacked metadata files can consume lot of disk space)
        for batch in batches:
            failed = self._download_metadata(batch)
            if failed:
                self.logger.warning("%d metadata files failed to download.",
                                    len(failed))
                failed_repos = [
                    repo for repo in batch
                    if self._repo_download_failed(repo, failed)
                ]
                self.clean_repodata(failed_repos)
                batch = [repo for repo in batch if repo not in failed_repos]
            self._unpack_metadata(batch)
            for repository in batch:
                repository.load_metadata()
                self.repo_store.store(repository)
                repository.unload_metadata()
            self.clean_repodata(batch)

        self.repo_store.cleanup_unused_data()
        self._clean_certificate_cache()
Beispiel #4
0
class TestBatchList:
    """TestBatchList class. Test creating list of lists"""
    @pytest.fixture()
    def batchlist(self):
        """Setup for batchlist testing."""
        self.blist = BatchList()

    def test_empty_batch(self, batchlist):
        """Test empty batchlist."""
        assert not self.blist.batches

    # Assuming default is 50, 102 = 3 batches, 50/50/2 ; 150 = 50/50/50; 157 == 4, 50/50/50/7
    # move thru the batches, making sure each other than the last is at most BATCH_MAX_SIZE long
    # and each batch has cumulative file_size less than BATCH_MAX_FILESIZE
    @pytest.mark.parametrize("list_size", [102, 150, 157])
    @pytest.mark.parametrize("item_filesize", ITEM_FILESIZES)
    def test_batch_creation(self, batchlist, list_size, item_filesize):
        """Test creation of batch list."""

        for i in range(list_size):
            self.blist.add_item(i, item_filesize)

        # batch size is variable, if items are too large, the batch might contain less than BATCH_MAX_SIZE items
        batch_size = min(int(BATCH_MAX_SIZE),
                         int(BATCH_MAX_FILESIZE) // item_filesize)

        total_batches = math.ceil(list_size / batch_size)
        last_batch_size = list_size % batch_size
        assert len(self.blist.batches) == total_batches
        for curr_batch in range(total_batches):
            if curr_batch == (total_batches - 1) and last_batch_size > 0:
                expected_num_in_batch = last_batch_size
            else:
                expected_num_in_batch = batch_size
            assert len(self.blist.batches[curr_batch]) == expected_num_in_batch

    def test_invalid_batch_size(self, batchlist):
        """
        Test creation of an invalid batch list.
        Should fail because single item is larger than max batch size
        """
        with pytest.raises(AssertionError):
            self.test_batch_creation(batchlist, 102,
                                     int(BATCH_MAX_FILESIZE) + 1)
    def store(self):
        """Sync all queued repositories. Process repositories in batches due to disk space and memory usage."""
        self.logger.info("Checking %d repositories.", len(self.repositories))

        self._write_certificate_cache()

        # Download all repomd files first
        failed = self._download_repomds()
        if failed:
            FAILED_REPOMD.inc(len(failed))
            failed_repos = [repo for repo in self.repositories if self._repo_download_failed(repo, failed)]
            self.logger.warning("%d repomd.xml files failed to download.", len(failed))
            self.clean_repodata(failed_repos)

        self._read_repomds()
        # Filter all repositories without repomd attribute set (downloaded repomd is not newer)
        batches = BatchList()
        up_to_date = []

        def md_size(repomd, data_type):
            try:
                mdata = repomd.get_metadata(data_type)
                # open-size is not present for uncompressed files
                return int(mdata.get('size', 0)) + int(mdata.get('open-size', '0'))
            except RepoMDTypeNotFound:
                return 0

        for repository in self.repositories:
            if repository.repomd:

                repo_size = md_size(repository.repomd, 'primary_db')
                # If we use primary_db, we don't even download primary data xml
                if repo_size == 0:
                    repo_size += md_size(repository.repomd, 'primary')

                repo_size += md_size(repository.repomd, 'updateinfo')
                repo_size += md_size(repository.repomd, 'modules')

                batches.add_item(repository, repo_size)
            else:
                up_to_date.append(repository)

        self.clean_repodata(up_to_date)
        self.logger.info("%d repositories are up to date.", len(up_to_date))
        total_repositories = batches.get_total_items()
        completed_repositories = 0
        self.logger.info("%d repositories need to be synced.", total_repositories)

        # Download and process repositories in batches (unpacked metadata files can consume lot of disk space)
        try:
            for batch in batches:
                self.logger.info("Syncing a batch of %d repositories", len(batch))
                try:
                    failed = self._download_metadata(batch)
                    if failed:
                        self.logger.warning("%d metadata files failed to download.", len(failed))
                        failed_repos = [repo for repo in batch if self._repo_download_failed(repo, failed)]
                        self.clean_repodata(failed_repos)
                        batch = [repo for repo in batch if repo not in failed_repos]
                    self._unpack_metadata(batch)
                    for repository in batch:
                        repository.load_metadata()
                        completed_repositories += 1
                        self.logger.info("Syncing repository: %s [%s/%s]", ", ".join(
                            filter(None, (repository.content_set, repository.basearch, repository.releasever))),
                                         completed_repositories, total_repositories)
                        self.repo_store.store(repository)
                        repository.unload_metadata()
                finally:
                    self.clean_repodata(batch)
        finally:
            self.repo_store.cleanup_unused_data()
            self._clean_certificate_cache()
Beispiel #6
0
 def batchlist(self):
     """Setup for batchlist testing."""
     self.blist = BatchList()
Beispiel #7
0
class TestBatchList:
    """ Batch list tests group. """
    batch_list = None
    DUMMY_ITEMS = [{"item": 1}, {"item": 2}, {"item": 3}]

    @pytest.fixture(autouse=True)
    def setup_batch_list(self):
        """ Fixture for creating batch list. """
        self.batch_list = BatchList()

    def test_empty(self):
        """ Test for empty batch list. """
        assert self.batch_list.get_total_items() == 0

    def test_insert_few(self):
        """ Test for inserton of one item. """
        for item in self.DUMMY_ITEMS:
            self.batch_list.add_item(item)

        assert self.batch_list.get_total_items() == len(self.DUMMY_ITEMS)

    def test_insert_full_batch(self):
        """ Test for insertion of multiple items, until new batch needs to be created. """
        max_size = int(BATCH_MAX_SIZE)

        for count in range(0, max_size + 1):
            self.batch_list.add_item({"item": count})

        assert self.batch_list.get_total_items() == max_size + 1
        assert len(self.batch_list.batches) == 2
        assert len(self.batch_list.batches[0]) == max_size
        assert len(self.batch_list.batches[1]) == 1

    def test_clear(self):
        """ Test for clearing the list. """
        for item in self.DUMMY_ITEMS:
            self.batch_list.add_item(item)

        self.batch_list.clear()
        assert self.batch_list.batches == []
Beispiel #8
0
 def setup_batch_list(self):
     """ Fixture for creating batch list. """
     self.batch_list = BatchList()
Beispiel #9
0
    def store(self):
        """Sync all queued repositories. Process repositories in batches due to disk space and memory usage."""
        self.logger.info("Checking %d repositories.", len(self.repositories))

        self._write_certificate_cache()

        # Download all repomd files first
        failed = self._download_repomds()
        if failed:
            FAILED_REPOMD.inc(len(failed))
            failed_repos = [
                repo for repo in self.repositories
                if self._repo_download_failed(repo, failed)
            ]
            self.logger.warning("%d repomd.xml files failed to download.",
                                len(failed))
            self.clean_repodata(failed_repos)

        self._read_repomds()
        # Filter all repositories without repomd attribute set (downloaded repomd is not newer)
        batches = BatchList()
        up_to_date = []
        for repository in self.repositories:
            if repository.repomd:
                batches.add_item(repository)
            else:
                up_to_date.append(repository)
        self.clean_repodata(up_to_date)
        self.logger.info("%d repositories are up to date.", len(up_to_date))
        total_repositories = batches.get_total_items()
        completed_repositories = 0
        self.logger.info("%d repositories need to be synced.",
                         total_repositories)

        # Download and process repositories in batches (unpacked metadata files can consume lot of disk space)
        try:
            for batch in batches:
                try:
                    failed = self._download_metadata(batch)
                    if failed:
                        self.logger.warning(
                            "%d metadata files failed to download.",
                            len(failed))
                        failed_repos = [
                            repo for repo in batch
                            if self._repo_download_failed(repo, failed)
                        ]
                        self.clean_repodata(failed_repos)
                        batch = [
                            repo for repo in batch if repo not in failed_repos
                        ]
                    self._unpack_metadata(batch)
                    for repository in batch:
                        repository.load_metadata()
                        completed_repositories += 1
                        self.logger.info(
                            "Syncing repository: %s [%s/%s]", ", ".join(
                                filter(None, (repository.content_set,
                                              repository.basearch,
                                              repository.releasever))),
                            completed_repositories, total_repositories)
                        self.repo_store.store(repository)
                        repository.unload_metadata()
                finally:
                    self.clean_repodata(batch)
        finally:
            self.repo_store.cleanup_unused_data()
            self._clean_certificate_cache()