async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. If a cookbook specifier is set in the remote, cookbooks are filtered using this specifier. """ with ProgressBar(message="Downloading Metadata", total=1) as pb: downloader = self.remote.get_downloader(url=urljoin(self.remote.url + "/", "universe")) result = await downloader.run() pb.increment() cookbook_names = self.remote.specifier_cookbook_names() with ProgressBar(message="Parsing Metadata") as pb: universe = Universe(result.path) for entry in universe.read(): if cookbook_names and entry.name not in cookbook_names: continue cookbook = CookbookPackageContent( name=entry.name, version=entry.version, dependencies=entry.dependencies ) artifact = Artifact() da = DeclarativeArtifact( artifact=artifact, url=entry.download_url, relative_path=cookbook.relative_path(), remote=self.remote, deferred_download=not self.download_artifacts, ) dc = DeclarativeContent(content=cookbook, d_artifacts=[da]) pb.increment() await self.put(dc)
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(url=self.remote.url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: for entry in self.read_my_metadata_file_somehow(result.path): path = os.path.join(root_dir, entry['picture']) url = urlunparse(parsed_url._replace(path=path)) unit = Animal(**entry) # make the content unit in memory-only artifact = Artifact() # make Artifact in memory-only da = DeclarativeArtifact(artifact, url, entry['picture'], self.remote) dc = DeclarativeContent(content=unit, d_artifacts=[da]) pb.increment() await self.put(dc)
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. """ deferred_download = (self.remote.policy != Remote.IMMEDIATE) # Interpret download policy with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(url=self.remote.url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: manifest = Manifest(result.path) for entry in manifest.read(): path = os.path.join(root_dir, entry.relative_path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=entry.relative_path, remote=self.remote, deferred_download=deferred_download, ) dc = DeclarativeContent(content=file, d_artifacts=[da]) pb.increment() await self.put(dc)
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the Manifest data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(self.remote.url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: manifest = Manifest(result.path) for entry in manifest.read(): path = os.path.join(root_dir, entry.relative_path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) da = DeclarativeArtifact(artifact, url, entry.relative_path, self.remote) dc = DeclarativeContent(content=file, d_artifacts=[da]) pb.increment() await out_q.put(dc) await out_q.put(None)
async def run(self): """ Parse Release content units. Update release content with information obtained from its artifact. """ with ProgressBar(message="Update Release units") as pb: async for d_content in self.items(): if isinstance(d_content.content, Release): release = d_content.content release_artifact = d_content.d_artifacts[0].artifact release.sha256 = release_artifact.sha256 release_dict = deb822.Release(release_artifact.file) release.codename = release_dict["Codename"] release.suite = release_dict["Suite"] # TODO split of extra stuff e.g. : 'updates/main' -> 'main' release.components = _filter_ssl( release_dict["Components"], self.components ) release.architectures = _filter_ssl( release_dict["Architectures"], self.architectures ) log.debug("Codename: {}".format(release.codename)) log.debug("Components: {}".format(release.components)) log.debug("Architectures: {}".format(release.architectures)) pb.increment() await self.put(d_content)
async def run(self): """ Parse PackageIndex content units. Ensure, that an uncompressed artifact is available. """ with ProgressBar(message='Update PackageIndex units') as pb: async for d_content in self.items(): if isinstance(d_content.content, PackageIndex): if not d_content.d_artifacts: raise NoPackageIndexFile() content = d_content.content if not [da for da in d_content.d_artifacts if da.artifact.sha256 == content.sha256]: # No main_artifact found uncompress one filename = _uncompress_artifact(d_content.d_artifacts[0].artifact) da = DeclarativeArtifact( Artifact(sha256=content.sha256), filename, content.relative_path, d_content.d_artifacts[0].remote, ) d_content.d_artifacts.append(da) await da.download() da.artifact.save() log.info("*** Expected: {} *** Uncompressed: {} ***".format( content.sha256, da.artifact.sha256)) pb.increment() await self.put(d_content)
async def run(self): """ Build and emit `DeclarativeContent` from the ansible metadata. """ with ProgressBar(message='Parsing Role Metadata') as pb: async for metadata in self._fetch_roles(): for version in metadata['summary_fields']['versions']: url = GITHUB_URL % ( metadata['github_user'], metadata['github_repo'], version['name'], ) role = Role(version=version['name'], name=metadata['name'], namespace=metadata['namespace']) relative_path = "%s/%s/%s.tar.gz" % ( metadata['namespace'], metadata['name'], version['name'], ) d_artifact = DeclarativeArtifact( artifact=Artifact(), url=url, relative_path=relative_path, remote=self.remote, deferred_download=self.deferred_download, ) d_content = DeclarativeContent( content=role, d_artifacts=[d_artifact], ) pb.increment() await self.put(d_content)
async def migrate_content(content_models): """ A coroutine to initiate content migration for each plugin. Args: content_models: List of Pulp 2 content models to migrate data for """ pre_migrators = [] content_migrators = [] for content_model in content_models: pre_migrators.append(pre_migrate_content(content_model)) _logger.debug('Pre-migrating Pulp 2 content') await asyncio.wait(pre_migrators) with ProgressBar(message='Migrating content to Pulp 3', total=0) as pb: # schedule content migration into Pulp 3 using pre-migrated Pulp 2 content for content_model in content_models: content_migrators.append( content_model.pulp_2to3_detail.migrate_content_to_pulp3()) # only used for progress bar counters content_type = content_model.pulp_2to3_detail.type pulp2content_qs = Pulp2Content.objects.filter( pulp2_content_type_id=content_type, pulp3_content=None) pb.total += pulp2content_qs.count() pb.save() await asyncio.wait(content_migrators) pb.done = pb.total
async def migrate_repositories(): """ A coroutine to migrate pre-migrated repositories. """ with ProgressBar(message='Creating repositories in Pulp 3', total=0) as pb: pulp2repos_qs = Pulp2Repository.objects.filter( pulp3_repository_version=None) pb.total += pulp2repos_qs.count() pb.save() for pulp2repo in pulp2repos_qs: # if pulp2 repo_id is too long, its hash is included in pulp3 repo name pulp3_repo_name = pulp2repo.pulp2_repo_id if len(pulp3_repo_name) > 255: repo_name_hash = hashlib.sha256( pulp3_repo_name.encode()).hexdigest() pulp3_repo_name = '{}-{}'.format(pulp3_repo_name[:190], repo_name_hash) repo, created = Repository.objects.get_or_create( name=pulp3_repo_name, description=pulp2repo.pulp2_description) if created: pb.increment() else: pb.total -= 1 pb.save()
def full_sync(self, delta): """ Synchronize the repository with the remote repository and download artifacts. Args: delta (namedtuple): Set of unit keys for units to be added to the repository. Set of unit keys for units that should be removed from the repository. Only the additions are used in this method. """ description = _("Dowloading artifacts and adding content to the repository.") current_task = Task() with ProgressBar(message=description, total=len(delta.additions)) as bar: with Batch(self.next_download(delta.additions)) as batch: for plan in batch(): try: plan.result() except DownloadError as e: current_task.append_non_fatal_error(e) else: content = self.content_dict.pop(plan.download.url) monitor_dict = self.monitors.pop(plan.download.url).facts() monitor_dict.update({'path': plan.download.writer.path}) self._create_and_associate_content(content, {plan.download.attachment: monitor_dict}) bar.increment()
async def __call__(self, in_q, out_q): """ The coroutine for this stage. Args: in_q (:class:`asyncio.Queue`): Each item is a :class:`django.db.models.query.QuerySet` of :class:`~pulpcore.plugin.models.Content` subclass that are already associated but not included in the stream of items from `in_q`. One :class:`django.db.models.query.QuerySet` is put for each :class:`~pulpcore.plugin.models.Content` type. out_q (:class:`asyncio.Queue`): Each item is a :class:`django.db.models.query.QuerySet` of :class:`~pulpcore.plugin.models.Content` subclass that were unassociated. One :class:`django.db.models.query.QuerySet` is put for each :class:`~pulpcore.plugin.models.Content` type. Returns: The coroutine for this stage. """ with ProgressBar(message='Un-Associating Content') as pb: while True: queryset_to_unassociate = await in_q.get() if queryset_to_unassociate is None: break self.new_version.remove_content(queryset_to_unassociate) pb.done = pb.done + queryset_to_unassociate.count() pb.save() await out_q.put(queryset_to_unassociate) await out_q.put(None)
async def run(self): """ Schedules multiple coroutines to migrate pre-migrated content to Pulp 3 """ content_type = self.model.type pulp2content_qs = Pulp2Content.objects.filter( pulp2_content_type_id=content_type, pulp3_content=None) total_pulp2content = pulp2content_qs.count() # determine the batch size if we can have up to 36 coroutines and the number of batches (or # coroutines) max_coro = 36 batch_size = 1 if total_pulp2content > max_coro: batch_size = math.ceil(total_pulp2content / max_coro) batch_count = math.ceil(total_pulp2content / batch_size) with ProgressBar(message='Migrating {} content to Pulp 3'.format( content_type.upper()), total=total_pulp2content) as pb: # schedule content migration migrators = [] for batch_idx in range(batch_count): start = batch_idx * batch_size end = (batch_idx + 1) * batch_size batch = pulp2content_qs[start:end] migrators.append(self.migrate_to_pulp3(batch, pb=pb)) if migrators: await asyncio.wait(migrators)
def associate_existing_content(self, content_q): """ Associates existing content to the importer's repository Args: content_q (queryset): Queryset that will return content that needs to be associated with the importer's repository. Returns: Set of natural keys representing each piece of content associated with the repository. """ added = set() with ProgressBar(message=_( "Associating units already in Pulp with the repository"), total=content_q.count()) as bar: for content in ExampleContent.objects.paginated_qs_results( content_q): association = RepositoryContent(repository=self.repository, content=content) association.save() bar.increment() # Remove it from the delta key = Key(path=content.path, digest=content.digest) added.add(key) return added
async def run(self): """ The coroutine for this stage. Returns: The coroutine for this stage. """ with ProgressBar(message='Associating Content') as pb: to_delete = set( self.new_version.content.values_list('pk', flat=True)) async for batch in self.batches(): to_add = set() for d_content in batch: try: to_delete.remove(d_content.content.pk) except KeyError: to_add.add(d_content.content.pk) if to_add: self.new_version.add_content( Content.objects.filter(pk__in=to_add)) pb.done = pb.done + len(to_add) pb.save() if to_delete: await self.put(Content.objects.filter(pk__in=to_delete))
def deferred_sync(self, delta): """ Synchronize the repository with the remote repository without downloading artifacts. Args: delta (namedtuple): Set of unit keys for units to be added to the repository. Set of unit keys for units that should be removed from the repository. Only the additions are used in this method. """ description = _("Adding file content to the repository without downloading artifacts.") progress_bar = ProgressBar(message=description, total=len(delta.additions)) with progress_bar: for remote_artifact in self.next_remote_artifact(delta.additions): content = self.content_dict.pop(remote_artifact.url) self._create_and_associate_content(content, {remote_artifact: None}) progress_bar.increment()
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the remote metadata. Fetch and parse the remote metadata, use the Project Specifiers on the Remote to determine which Python packages should be synced. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to. """ ps = ProjectSpecifier.objects.filter(remote=self.remote) with ProgressBar(message='Fetching Project Metadata') as pb: # Group multiple specifiers to the same project together, so that we only have to fetch # the metadata once, and can re-use it if there are multiple specifiers. for name, project_specifiers in groupby_unsorted( ps, key=lambda x: x.name): # Fetch the metadata from PyPI pb.increment() try: metadata = await self.get_project_metadata(name) except ClientResponseError as e: # Project doesn't exist, log a message and move on log.info( _("HTTP 404 'Not Found' for url '{url}'\n" "Does project '{name}' exist on the remote repository?" ).format(url=e.request_info.url, name=name)) continue project_specifiers = list(project_specifiers) # Determine which packages from the project match the criteria in the specifiers packages = await self.get_relevant_packages( metadata=metadata, includes=[ specifier for specifier in project_specifiers if not specifier.exclude ], excludes=[ specifier for specifier in project_specifiers if specifier.exclude ], prereleases=self.remote.prereleases) # For each package, create Declarative objects to pass into the next stage for entry in packages: url = entry.pop('url') artifact = Artifact(sha256=entry.pop('sha256_digest')) package = PythonPackageContent(**entry) da = DeclarativeArtifact(artifact, url, entry['filename'], self.remote) dc = DeclarativeContent(content=package, d_artifacts=[da]) await out_q.put(dc) await out_q.put(None)
async def pre_migrate_all_without_content(plan): """ Pre-migrate repositories, relations to their contents, importers and distributors. NOTE: MongoDB and Django handle datetime fields differently. MongoDB doesn't care about timezones and provides "naive" time, while Django is complaining about time without a timezone. The problem is that naive time != time with specified timezone, that's why all the time for MongoDB comparisons should be naive and all the time for Django/PostgreSQL should be timezone aware. Args: plan(MigrationPlan): A Migration Plan """ repos = plan.get_repositories() importers = plan.get_importers() distributors = plan.get_distributors() _logger.debug('Pre-migrating Pulp 2 repositories') # the latest time we have in the migration tool in Pulp2Repository table zero_datetime = timezone.make_aware(datetime(1970, 1, 1), timezone.utc) last_added = Pulp2Repository.objects.aggregate( Max('pulp2_last_unit_added' ))['pulp2_last_unit_added__max'] or zero_datetime last_removed = Pulp2Repository.objects.aggregate( Max('pulp2_last_unit_removed') )['pulp2_last_unit_removed__max'] or zero_datetime last_updated = max(last_added, last_removed) last_updated_naive = timezone.make_naive(last_updated, timezone=timezone.utc) with ProgressBar( message='Pre-migrating Pulp 2 repositories, importers, distributors' ) as pb: # we pre-migrate: # - empty repos (last_unit_added is not set) # - repos which were updated since last migration (last_unit_added/removed >= last_updated) mongo_repo_q = (mongo_Q(last_unit_added__exists=False) | mongo_Q(last_unit_added__gte=last_updated_naive) | mongo_Q(last_unit_removed__gte=last_updated_naive)) # in case only certain repositories are specified in the migration plan if repos: mongo_repo_q &= mongo_Q(repo_id__in=repos) mongo_repo_qs = Repository.objects(mongo_repo_q) pb.total = mongo_repo_qs.count() pb.save() for repo_data in mongo_repo_qs.only('id', 'repo_id', 'last_unit_added', 'last_unit_removed'): # await pre_migrate_one(repo_data, importers, distributors) with transaction.atomic(): repo = await pre_migrate_repo(repo_data) await pre_migrate_importer(repo, importers) await pre_migrate_distributor(repo, distributors) await pre_migrate_repocontent(repo) pb.increment()
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the Spec data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = parsed_url.path specs_path = os.path.join(root_dir, 'specs.4.8.gz') specs_url = urlunparse(parsed_url._replace(path=specs_path)) downloader = self.remote.get_downloader(url=specs_url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: for key in read_specs(result.path): relative_path = os.path.join( 'gems', key.name + '-' + key.version + '.gem') path = os.path.join(root_dir, relative_path) url = urlunparse(parsed_url._replace(path=path)) spec_relative_path = os.path.join( 'quick/Marshal.4.8', key.name + '-' + key.version + '.gemspec.rz') spec_path = os.path.join(root_dir, spec_relative_path) spec_url = urlunparse(parsed_url._replace(path=spec_path)) gem = GemContent(name=key.name, version=key.version) da_gem = DeclarativeArtifact(Artifact(), url, relative_path, self.remote) da_spec = DeclarativeArtifact(Artifact(), spec_url, spec_relative_path, self.remote) dc = DeclarativeContent(content=gem, d_artifacts=[da_gem, da_spec]) pb.increment() await out_q.put(dc) await out_q.put(None)
def deferred_sync(self, delta): """ Synchronize the repository with the remote repository without downloading artifacts. Args: delta (namedtuple) """ description = _("Adding file content to the repository without downloading artifacts.") with ProgressBar(message=description, total=len(delta.additions)) as bar: for group in self.next_group(delta.additions): self._create_and_associate_content(group) bar.increment()
async def __call__(self, in_q, out_q): """ The coroutine for this stage. Args: in_q (:class:`asyncio.Queue`): Each item is a :class:`~pulpcore.plugin.stages.DeclarativeContent` with saved `content` that needs to be associated. out_q (:class:`asyncio.Queue`): Each item is a :class:`django.db.models.query.QuerySet` of :class:`~pulpcore.plugin.models.Content` subclass that are already associated but not included in the stream of items from `in_q`. One :class:`django.db.models.query.QuerySet` is put for each :class:`~pulpcore.plugin.models.Content` type. Returns: The coroutine for this stage. """ with ProgressBar(message='Associating Content') as pb: async for batch in self.batches(in_q): content_q_by_type = defaultdict(lambda: Q(pk=None)) for declarative_content in batch: try: unit_key = declarative_content.content.natural_key() self.unit_keys_by_type[type( declarative_content.content)].remove(unit_key) except KeyError: model_type = type(declarative_content.content) unit_key_dict = declarative_content.content.natural_key_dict( ) unit_q = Q(**unit_key_dict) content_q_by_type[model_type] = content_q_by_type[ model_type] | unit_q for model_type, q_object in content_q_by_type.items(): queryset = model_type.objects.filter(q_object) self.new_version.add_content(queryset) pb.done = pb.done + queryset.count() pb.save() for unit_type, ids in self.unit_keys_by_type.items(): if ids: units_to_unassociate = Q() for unit_key in self.unit_keys_by_type[unit_type]: query_dict = {} for i, key_name in enumerate( unit_type.natural_key_fields()): query_dict[key_name] = unit_key[i] units_to_unassociate |= Q(**query_dict) await out_q.put( unit_type.objects.filter(units_to_unassociate)) await out_q.put(None)
async def run(self): """ The coroutine for this stage. Returns: The coroutine for this stage. """ def _add_to_pending(coro): nonlocal pending task = asyncio.ensure_future(coro) pending.add(task) return task #: (set): The set of unfinished tasks. Contains the content # handler tasks and may contain `content_get_task`. pending = set() content_iterator = self.items() #: (:class:`asyncio.Task`): The task that gets new content from `self._in_q`. # Set to None if stage is shutdown. content_get_task = _add_to_pending(content_iterator.__anext__()) with ProgressBar(message='Downloading Artifacts') as pb: try: while pending: done, pending = await asyncio.wait( pending, return_when=asyncio.FIRST_COMPLETED) for task in done: if task is content_get_task: try: _add_to_pending( self._handle_content_unit(task.result())) except StopAsyncIteration: # previous stage is finished and we retrieved all # content instances: shutdown content_get_task = None else: pb.done += task.result() # download_count pb.save() if content_get_task and content_get_task not in pending: # not yet shutdown if len(pending) < self.max_concurrent_content: content_get_task = _add_to_pending( content_iterator.__anext__()) except asyncio.CancelledError: # asyncio.wait does not cancel its tasks when cancelled, we need to do this for future in pending: future.cancel() raise
async def run(self): """ The coroutine for this stage. Returns: The coroutine for this stage. """ with ProgressBar(message='Un-Associating Content') as pb: async for queryset_to_unassociate in self.items(): self.new_version.remove_content(queryset_to_unassociate) pb.done = pb.done + queryset_to_unassociate.count() pb.save() await self.put(queryset_to_unassociate)
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the Manifest data. If a cookbook specifier is set in the remote, cookbooks are filtered using this specifier. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ with ProgressBar(message='Downloading Metadata', total=1) as pb: downloader = self.remote.get_downloader( url=urljoin(self.remote.url + '/', 'universe')) result = await downloader.run() pb.increment() cookbook_names = self.remote.specifier_cookbook_names() with ProgressBar(message='Parsing Metadata') as pb: universe = Universe(result.path) for entry in universe.read(): if cookbook_names and entry.name not in cookbook_names: continue cookbook = CookbookPackageContent( name=entry.name, version=entry.version, dependencies=entry.dependencies) artifact = Artifact() da = DeclarativeArtifact(artifact, entry.download_url, cookbook.relative_path(), self.remote) dc = DeclarativeContent(content=cookbook, d_artifacts=[da]) pb.increment() await out_q.put(dc) await out_q.put(None)
async def run(self): """ The coroutine doing the stage's work. """ #: (set): The set of unfinished tasks. Contains the content # handler tasks and may contain `self._content_get_task`. self._pending = set() #: (:class:`asyncio.Task`): The task that gets new content from `in_q`. # Set to None if stage is shutdown. self._content_get_task = self._add_to_pending(self.in_q.get()) #: (:class:`asyncio.Semaphore`): Semaphore controlling the number of concurrent downloads self._download_semaphore = asyncio.Semaphore( value=self.max_concurrent_downloads) with ProgressBar(message='Downloading Artifacts') as pb: try: while self._pending: done, self._pending = await asyncio.wait( self._pending, return_when=asyncio.FIRST_COMPLETED) for task in done: if task is self._content_get_task: content = task.result() if content is None: # previous stage is finished and we retrieved all # content instances: shutdown self._content_get_task = None else: self._add_to_pending( self._handle_content_unit(content)) else: download_count = task.result() pb.done += download_count pb.save() if not self.shutdown: if not self.saturated and self._content_get_task not in self._pending: self._content_get_task = self._add_to_pending( self.in_q.get()) except asyncio.CancelledError: # asyncio.wait does not cancel its tasks when cancelled, we need to do this for future in self._pending: future.cancel() raise await self.out_q.put(None)
def publish(self): """ Publish the repository. """ with ProgressBar(message=_("Publishing repository metadata"), total=1) as bar: manifest_name = 'PULP_MANIFEST' with open(manifest_name, 'w+') as fp: for entry in self._publish(): fp.write(entry) fp.write('\n') metadata = PublishedMetadata( relative_path=os.path.basename(manifest_name), publication=self.publication, file=File(open(manifest_name, 'rb'))) metadata.save() bar.increment()
async def _fetch_galaxy_pages(self): """ Fetch the roles in a remote repository. Returns: async generator: dicts that represent pages from galaxy api """ page_count = 0 remote = self.remote def role_page_url(url, page=1): parsed_url = urlparse(url) new_query = parse_qs(parsed_url.query) new_query['page'] = page return urlunparse( parsed_url._replace(query=urlencode(new_query, doseq=True))) def parse_metadata(download_result): with open(download_result.path) as fd: return json.load(fd) with ProgressBar( message='Parsing Pages from Galaxy Roles API') as progress_bar: downloader = remote.get_downloader(url=role_page_url(remote.url)) metadata = parse_metadata(await downloader.run()) page_count = math.ceil(float(metadata['count']) / float(PAGE_SIZE)) progress_bar.total = page_count progress_bar.save() yield metadata progress_bar.increment() # Concurrent downloads are limited by aiohttp... not_done = set( remote.get_downloader( url=role_page_url(remote.url, page)).run() for page in range(2, page_count + 1)) while not_done: done, not_done = await asyncio.wait( not_done, return_when=FIRST_COMPLETED) for item in done: yield parse_metadata(item.result()) progress_bar.increment()
def full_sync(self, delta): """ Synchronize the repository with the remote repository without downloading artifacts. """ description = _("Dowloading artifacts and adding content to the repository.") downloader = GroupDownloader(self) downloader.schedule_from_iterator(self.next_group(delta.additions)) with ProgressBar(message=description, total=len(delta.additions)) as bar: for group in downloader: download_error = False for url, result in group.downloaded_files.items(): if result.exception: download_error = True if not download_error: self._create_and_associate_content(group) bar.increment() log.warning('content_unit = {0}'.format(group.id))
async def run(self): """ Update package content with the information obtained from its artifact. """ with ProgressBar(message="Update Package units") as pb: async for d_content in self.items(): if isinstance(d_content.content, Package): package = d_content.content package_artifact = d_content.d_artifacts[0].artifact if not package_artifact._state.adding: package_paragraph = debfile.DebFile( fileobj=package_artifact.file ).debcontrol() package_dict = Package.from822(package_paragraph) for key, value in package_dict.items(): setattr(package, key, value) pb.increment() await self.put(d_content)
def sync(self): """ Synchronize the repository with the remote repository. """ self.content_dict = { } # keys are unit keys and values are lists of deferred artifacts # associated with the content self.monitors = {} delta = self._find_delta() # Find all content being added that already exists in Pulp and associate with repository. fields = {f for f in ExampleContent.natural_key_fields()} if not self.is_deferred: # Filter out any content that still needs to have artifacts downloaded ready_to_associate = ExampleContent.objects.find_by_unit_key( delta.additions).only(*fields) else: ready_to_associate = ExampleContent.objects.find_by_unit_key( delta.additions, partial=True).only(*fields) added = self.associate_existing_content(ready_to_associate) remaining_additions = delta.additions - added delta = Delta(additions=remaining_additions, removals=delta.removals) if self.is_deferred: self.deferred_sync(delta) else: self.full_sync(delta) # Remove content if there is any to remove if delta.removals: # Build a query that uniquely identifies all content that needs to be removed. with ProgressBar(message=_("Removing content from repository."), total=len(delta.removals)) as bar: q = models.Q() for key in delta.removals: q |= models.Q(examplecontent__path=key.path, examplecontent__digest=key.digest) q_set = self.repository.content.filter(q) bar.done = RepositoryContent.objects.filter( repository=self.repository).filter( content__in=q_set).delete()[0]
async def run(self): """ Build and emit `DeclarativeContent` from the ansible metadata. """ with ProgressBar(message='Parsing Role Metadata') as pb: pending = [] async for metadata in self._fetch_roles(): role = AnsibleRole(name=metadata['name'], namespace=metadata['namespace']) d_content = DeclarativeContent(content=role, d_artifacts=[], does_batch=False) pending.append( asyncio.ensure_future( self._add_role_versions( d_content.get_or_create_future(), metadata, ))) await self.put(d_content) pb.increment() await asyncio.gather(*pending)
async def run(self): """ DockerFirstStage. """ future_manifests = [] tag_list = [] to_download = [] man_dcs = {} total_blobs = [] with ProgressBar(message='Downloading tag list', total=1) as pb: repo_name = self.remote.namespaced_upstream_name relative_url = '/v2/{name}/tags/list'.format(name=repo_name) tag_list_url = urljoin(self.remote.url, relative_url) list_downloader = self.remote.get_downloader(url=tag_list_url) await list_downloader.run(extra_data={'repo_name': repo_name}) with open(list_downloader.path) as tags_raw: tags_dict = json.loads(tags_raw.read()) tag_list = tags_dict['tags'] # check for the presence of the pagination link header link = list_downloader.response_headers.get('Link') await self.handle_pagination(link, repo_name, tag_list) whitelist_tags = self.remote.whitelist_tags if whitelist_tags: tag_list = list(set(tag_list) & set(whitelist_tags.split(','))) pb.increment() msg = 'Creating Download requests for v2 Tags' with ProgressBar(message=msg, total=len(tag_list)) as pb: for tag_name in tag_list: relative_url = '/v2/{name}/manifests/{tag}'.format( name=self.remote.namespaced_upstream_name, tag=tag_name, ) url = urljoin(self.remote.url, relative_url) downloader = self.remote.get_downloader(url=url) to_download.append(downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS})) pb.increment() pb_parsed_tags = ProgressBar(message='Processing v2 Tags', state='running') pb_parsed_ml_tags = ProgressBar(message='Parsing Manifest List Tags', state='running') pb_parsed_m_tags = ProgressBar(message='Parsing Manifests Tags', state='running') global pb_parsed_blobs pb_parsed_blobs = ProgressBar(message='Parsing Blobs', state='running') pb_parsed_man = ProgressBar(message='Parsing Manifests', state='running') for download_tag in asyncio.as_completed(to_download): tag = await download_tag with open(tag.path) as content_file: raw = content_file.read() content_data = json.loads(raw) mediatype = content_data.get('mediaType') tag.artifact_attributes['file'] = tag.path saved_artifact = Artifact(**tag.artifact_attributes) try: saved_artifact.save() except IntegrityError: del tag.artifact_attributes['file'] saved_artifact = Artifact.objects.get(**tag.artifact_attributes) tag_dc = self.create_tag(mediatype, saved_artifact, tag.url) if type(tag_dc.content) is ManifestListTag: list_dc = self.create_tagged_manifest_list( tag_dc, content_data) await self.put(list_dc) pb_parsed_ml_tags.increment() tag_dc.extra_data['list_relation'] = list_dc for manifest_data in content_data.get('manifests'): man_dc = self.create_manifest(list_dc, manifest_data) future_manifests.append(man_dc.get_or_create_future()) man_dcs[man_dc.content.digest] = man_dc await self.put(man_dc) pb_parsed_man.increment() elif type(tag_dc.content) is ManifestTag: man_dc = self.create_tagged_manifest(tag_dc, content_data) await self.put(man_dc) pb_parsed_m_tags.increment() tag_dc.extra_data['man_relation'] = man_dc self.handle_blobs(man_dc, content_data, total_blobs) await self.put(tag_dc) pb_parsed_tags.increment() pb_parsed_tags.state = 'completed' pb_parsed_tags.total = pb_parsed_tags.done pb_parsed_tags.save() pb_parsed_ml_tags.state = 'completed' pb_parsed_ml_tags.total = pb_parsed_ml_tags.done pb_parsed_ml_tags.save() pb_parsed_m_tags.state = 'completed' pb_parsed_m_tags.total = pb_parsed_m_tags.done pb_parsed_m_tags.save() pb_parsed_man.state = 'completed' pb_parsed_man.total = pb_parsed_man.done pb_parsed_man.save() for manifest_future in asyncio.as_completed(future_manifests): man = await manifest_future with man._artifacts.get().file.open() as content_file: raw = content_file.read() content_data = json.loads(raw) man_dc = man_dcs[man.digest] self.handle_blobs(man_dc, content_data, total_blobs) for blob in total_blobs: await self.put(blob) pb_parsed_blobs.state = 'completed' pb_parsed_blobs.total = pb_parsed_blobs.done pb_parsed_blobs.save()