def store(self): """Sync all queued repositories. Process repositories in batches due to disk space and memory usage.""" self.logger.log("Checking %d repositories." % len(self.repositories)) # Fetch current list of repositories from DB self.db_repositories = self.repo_store.list_repositories() # Download all repomd files first failed = self._download_repomds() self.logger.log("%d repomd.xml files failed to download." % len(failed)) self._read_repomds(failed) # Filter all repositories without repomd attribute set (failed download, downloaded repomd is not newer) batches = BatchList() to_skip = [] for repository in self.repositories: if repository.repomd: batches.add_item(repository) else: to_skip.append(repository) self.clean_repodata(to_skip) self.logger.log("%d repositories skipped." % len(to_skip)) self.logger.log("Syncing %d repositories." % sum(len(l) for l in batches)) # Download and process repositories in batches (unpacked metadata files can consume lot of disk space) for batch in batches: self._download_metadata(batch) self._unpack_metadata(batch) for repository in batch: repository.load_metadata() self.repo_store.store(repository) repository.unload_metadata() self.clean_repodata(batch)
def store(self): """Sync all queued CVE lists. Runs in batches due to disk space and memory usage.""" self.logger.info("Checking %d CVE lists.", len(self.repos)) # Download all repomd files first failed = self._download_meta() if failed: FAILED_NIST.inc() self.logger.warning("%d meta files failed to download.", len(failed)) self._read_meta(failed) # filter out failed / unchanged lists batches = BatchList() to_skip = [] for repo in self.repos: if repo.meta: batches.add_item(repo) else: to_skip.append(repo) self.clean_repo(to_skip) self.logger.info("%d CVE lists skipped.", len(to_skip)) self.logger.info("Syncing %d CVE lists.", sum(len(l) for l in batches)) # Download and process repositories in batches (unpacked metadata files can consume lot of disk space) for batch in batches: try: self._download_json(batch) self._unpack_json(batch) for repo in sorted(batch, key=lambda repo: repo.label): repo.load_json() self.cverepo_store.store(repo) repo.unload_json() finally: self.clean_repo(batch)
def store(self): """Sync all queued repositories. Process repositories in batches due to disk space and memory usage.""" self.logger.info("Checking %d repositories.", len(self.repositories)) self._write_certificate_cache() # Download all repomd files first failed = self._download_repomds() if failed: self.logger.warning("%d repomd.xml files failed to download.", len(failed)) failed_repos = [ repo for repo in self.repositories if self._repo_download_failed(repo, failed) ] self.clean_repodata(failed_repos) self._read_repomds() # Filter all repositories without repomd attribute set (failed download, downloaded repomd is not newer) batches = BatchList() to_skip = [] for repository in self.repositories: if repository.repomd: batches.add_item(repository) else: to_skip.append(repository) self.clean_repodata(to_skip) self.logger.info("%d repositories skipped.", len(to_skip)) self.logger.info("Syncing %d repositories.", sum(len(l) for l in batches)) # Download and process repositories in batches (unpacked metadata files can consume lot of disk space) for batch in batches: failed = self._download_metadata(batch) if failed: self.logger.warning("%d metadata files failed to download.", len(failed)) failed_repos = [ repo for repo in batch if self._repo_download_failed(repo, failed) ] self.clean_repodata(failed_repos) batch = [repo for repo in batch if repo not in failed_repos] self._unpack_metadata(batch) for repository in batch: repository.load_metadata() self.repo_store.store(repository) repository.unload_metadata() self.clean_repodata(batch) self.repo_store.cleanup_unused_data() self._clean_certificate_cache()
class TestBatchList: """TestBatchList class. Test creating list of lists""" @pytest.fixture() def batchlist(self): """Setup for batchlist testing.""" self.blist = BatchList() def test_empty_batch(self, batchlist): """Test empty batchlist.""" assert not self.blist.batches # Assuming default is 50, 102 = 3 batches, 50/50/2 ; 150 = 50/50/50; 157 == 4, 50/50/50/7 # move thru the batches, making sure each other than the last is at most BATCH_MAX_SIZE long # and each batch has cumulative file_size less than BATCH_MAX_FILESIZE @pytest.mark.parametrize("list_size", [102, 150, 157]) @pytest.mark.parametrize("item_filesize", ITEM_FILESIZES) def test_batch_creation(self, batchlist, list_size, item_filesize): """Test creation of batch list.""" for i in range(list_size): self.blist.add_item(i, item_filesize) # batch size is variable, if items are too large, the batch might contain less than BATCH_MAX_SIZE items batch_size = min(int(BATCH_MAX_SIZE), int(BATCH_MAX_FILESIZE) // item_filesize) total_batches = math.ceil(list_size / batch_size) last_batch_size = list_size % batch_size assert len(self.blist.batches) == total_batches for curr_batch in range(total_batches): if curr_batch == (total_batches - 1) and last_batch_size > 0: expected_num_in_batch = last_batch_size else: expected_num_in_batch = batch_size assert len(self.blist.batches[curr_batch]) == expected_num_in_batch def test_invalid_batch_size(self, batchlist): """ Test creation of an invalid batch list. Should fail because single item is larger than max batch size """ with pytest.raises(AssertionError): self.test_batch_creation(batchlist, 102, int(BATCH_MAX_FILESIZE) + 1)
def store(self): """Sync all queued repositories. Process repositories in batches due to disk space and memory usage.""" self.logger.info("Checking %d repositories.", len(self.repositories)) self._write_certificate_cache() # Download all repomd files first failed = self._download_repomds() if failed: FAILED_REPOMD.inc(len(failed)) failed_repos = [repo for repo in self.repositories if self._repo_download_failed(repo, failed)] self.logger.warning("%d repomd.xml files failed to download.", len(failed)) self.clean_repodata(failed_repos) self._read_repomds() # Filter all repositories without repomd attribute set (downloaded repomd is not newer) batches = BatchList() up_to_date = [] def md_size(repomd, data_type): try: mdata = repomd.get_metadata(data_type) # open-size is not present for uncompressed files return int(mdata.get('size', 0)) + int(mdata.get('open-size', '0')) except RepoMDTypeNotFound: return 0 for repository in self.repositories: if repository.repomd: repo_size = md_size(repository.repomd, 'primary_db') # If we use primary_db, we don't even download primary data xml if repo_size == 0: repo_size += md_size(repository.repomd, 'primary') repo_size += md_size(repository.repomd, 'updateinfo') repo_size += md_size(repository.repomd, 'modules') batches.add_item(repository, repo_size) else: up_to_date.append(repository) self.clean_repodata(up_to_date) self.logger.info("%d repositories are up to date.", len(up_to_date)) total_repositories = batches.get_total_items() completed_repositories = 0 self.logger.info("%d repositories need to be synced.", total_repositories) # Download and process repositories in batches (unpacked metadata files can consume lot of disk space) try: for batch in batches: self.logger.info("Syncing a batch of %d repositories", len(batch)) try: failed = self._download_metadata(batch) if failed: self.logger.warning("%d metadata files failed to download.", len(failed)) failed_repos = [repo for repo in batch if self._repo_download_failed(repo, failed)] self.clean_repodata(failed_repos) batch = [repo for repo in batch if repo not in failed_repos] self._unpack_metadata(batch) for repository in batch: repository.load_metadata() completed_repositories += 1 self.logger.info("Syncing repository: %s [%s/%s]", ", ".join( filter(None, (repository.content_set, repository.basearch, repository.releasever))), completed_repositories, total_repositories) self.repo_store.store(repository) repository.unload_metadata() finally: self.clean_repodata(batch) finally: self.repo_store.cleanup_unused_data() self._clean_certificate_cache()
def batchlist(self): """Setup for batchlist testing.""" self.blist = BatchList()
class TestBatchList: """ Batch list tests group. """ batch_list = None DUMMY_ITEMS = [{"item": 1}, {"item": 2}, {"item": 3}] @pytest.fixture(autouse=True) def setup_batch_list(self): """ Fixture for creating batch list. """ self.batch_list = BatchList() def test_empty(self): """ Test for empty batch list. """ assert self.batch_list.get_total_items() == 0 def test_insert_few(self): """ Test for inserton of one item. """ for item in self.DUMMY_ITEMS: self.batch_list.add_item(item) assert self.batch_list.get_total_items() == len(self.DUMMY_ITEMS) def test_insert_full_batch(self): """ Test for insertion of multiple items, until new batch needs to be created. """ max_size = int(BATCH_MAX_SIZE) for count in range(0, max_size + 1): self.batch_list.add_item({"item": count}) assert self.batch_list.get_total_items() == max_size + 1 assert len(self.batch_list.batches) == 2 assert len(self.batch_list.batches[0]) == max_size assert len(self.batch_list.batches[1]) == 1 def test_clear(self): """ Test for clearing the list. """ for item in self.DUMMY_ITEMS: self.batch_list.add_item(item) self.batch_list.clear() assert self.batch_list.batches == []
def setup_batch_list(self): """ Fixture for creating batch list. """ self.batch_list = BatchList()
def store(self): """Sync all queued repositories. Process repositories in batches due to disk space and memory usage.""" self.logger.info("Checking %d repositories.", len(self.repositories)) self._write_certificate_cache() # Download all repomd files first failed = self._download_repomds() if failed: FAILED_REPOMD.inc(len(failed)) failed_repos = [ repo for repo in self.repositories if self._repo_download_failed(repo, failed) ] self.logger.warning("%d repomd.xml files failed to download.", len(failed)) self.clean_repodata(failed_repos) self._read_repomds() # Filter all repositories without repomd attribute set (downloaded repomd is not newer) batches = BatchList() up_to_date = [] for repository in self.repositories: if repository.repomd: batches.add_item(repository) else: up_to_date.append(repository) self.clean_repodata(up_to_date) self.logger.info("%d repositories are up to date.", len(up_to_date)) total_repositories = batches.get_total_items() completed_repositories = 0 self.logger.info("%d repositories need to be synced.", total_repositories) # Download and process repositories in batches (unpacked metadata files can consume lot of disk space) try: for batch in batches: try: failed = self._download_metadata(batch) if failed: self.logger.warning( "%d metadata files failed to download.", len(failed)) failed_repos = [ repo for repo in batch if self._repo_download_failed(repo, failed) ] self.clean_repodata(failed_repos) batch = [ repo for repo in batch if repo not in failed_repos ] self._unpack_metadata(batch) for repository in batch: repository.load_metadata() completed_repositories += 1 self.logger.info( "Syncing repository: %s [%s/%s]", ", ".join( filter(None, (repository.content_set, repository.basearch, repository.releasever))), completed_repositories, total_repositories) self.repo_store.store(repository) repository.unload_metadata() finally: self.clean_repodata(batch) finally: self.repo_store.cleanup_unused_data() self._clean_certificate_cache()