async def migrate_to_pulp3(cls, pulp2distributor, repo_version): """ Migrate distributor to Pulp 3. Args: pulp2distributor(Pulp2ditributor): Pre-migrated pulp2 distributor to migrate Return: publication and distribution: FilePublication and FileDistribution in Pulp3 created(bool): True if Distribution has just been created; False if Distribution is an existing one """ if not repo_version: repo_version = pulp2distributor.pulp2_repository.pulp3_repository_version publication = repo_version.publication_set.first() if not publication: # create publication with FilePublication.create(repo_version, pass_through=True) as publication: manifest = Manifest('PULP_MANIFEST') manifest.write(populate(publication)) PublishedMetadata.create_from_file(file=File( open(manifest.relative_path, "rb")), publication=publication) # create distribution pulp2_config = pulp2distributor.pulp2_config base_config = cls.parse_base_config(pulp2distributor, pulp2_config) base_config['base_path'] = pulp2_config.get( 'relative_url', pulp2distributor.pulp2_repository.pulp2_repo_id) base_config['publication'] = publication distribution, created = FileDistribution.objects.update_or_create( **base_config) return publication, distribution, created
def publish(manifest, repository_version_pk): """ Create a Publication based on a RepositoryVersion. Args: manifest (str): Filename to use for manifest file. repository_version_pk (str): Create a publication from this repository version. """ repo_version = RepositoryVersion.objects.get(pk=repository_version_pk) log.info( _("Publishing: repository={repo}, version={ver}, manifest={manifest}"). format(repo=repo_version.repository.name, ver=repo_version.number, manifest=manifest)) with WorkingDirectory(): with FilePublication.create(repo_version, pass_through=True) as publication: manifest = Manifest(manifest) manifest.write(populate(publication)) PublishedMetadata.create_from_file(file=File( open(manifest.relative_path, "rb")), publication=publication) log.info( _("Publication: {publication} created").format( publication=publication.pk))
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the Manifest data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(self.remote.url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: manifest = Manifest(result.path) for entry in manifest.read(): path = os.path.join(root_dir, entry.relative_path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) da = DeclarativeArtifact(artifact, url, entry.relative_path, self.remote) dc = DeclarativeContent(content=file, d_artifacts=[da]) pb.increment() await out_q.put(dc) await out_q.put(None)
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. """ deferred_download = (self.remote.policy != Remote.IMMEDIATE) # Interpret download policy with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(url=self.remote.url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: manifest = Manifest(result.path) for entry in manifest.read(): path = os.path.join(root_dir, entry.relative_path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=entry.relative_path, remote=self.remote, deferred_download=deferred_download, ) dc = DeclarativeContent(content=file, d_artifacts=[da]) pb.increment() await self.put(dc)
def publish(publisher_pk, repository_version_pk): """ Use provided publisher to create a Publication based on a RepositoryVersion. Args: publisher_pk (str): Use the publish settings provided by this publisher. repository_version_pk (str): Create a publication from this repository version. """ publisher = FilePublisher.objects.get(pk=publisher_pk) repository_version = RepositoryVersion.objects.get(pk=repository_version_pk) log.info( _('Publishing: repository=%(repository)s, version=%(version)d, publisher=%(publisher)s'), { 'repository': repository_version.repository.name, 'version': repository_version.number, 'publisher': publisher.name, }) with WorkingDirectory(): with Publication.create(repository_version, publisher) as publication: manifest = Manifest('PULP_MANIFEST') manifest.write(populate(publication)) metadata = PublishedMetadata( relative_path=os.path.basename(manifest.relative_path), publication=publication, file=File(open(manifest.relative_path, 'rb'))) metadata.save() log.info( _('Publication: %(publication)s created'), { 'publication': publication.pk })
def publish(publisher_pk, repository_version_pk): """ Use provided publisher to create a Publication based on a RepositoryVersion. Args: publisher_pk (str): Use the publish settings provided by this publisher. repository_version_pk (str): Create a publication from this repository version. """ publisher = FilePublisher.objects.get(pk=publisher_pk) repository_version = RepositoryVersion.objects.get(pk=repository_version_pk) log.info(_('Publishing: repository={repo}, version={ver}, publisher={pub}').format( repo=repository_version.repository.name, ver=repository_version.number, pub=publisher.name, )) with WorkingDirectory(): with Publication.create(repository_version, publisher, pass_through=True) as publication: manifest = Manifest(publisher.manifest) manifest.write(populate(publication)) metadata = PublishedMetadata( relative_path=os.path.basename(manifest.relative_path), publication=publication, file=File(open(manifest.relative_path, 'rb'))) metadata.save() log.info(_('Publication: {publication} created').format(publication=publication.pk))
def _fetch_manifest(self): """ Fetch (download) the manifest. """ parsed_url = urlparse(self._importer.feed_url) download = self._importer.get_futures_downloader( self._importer.feed_url, os.path.basename(parsed_url.path)) download() self._manifest = Manifest(download.writer.path)
def publish(self): """ Publish the repository. """ manifest = Manifest('PULP_MANIFEST') manifest.write(self._publish()) metadata = PublishedMetadata( relative_path=os.path.basename(manifest.path), publication=self.publication, file=File(open(manifest.path, 'rb'))) metadata.save()
def fetch_manifest(remote): """ Fetch (download) the manifest. Args: remote (FileRemote): An remote. """ downloader = remote.get_downloader(remote.url) downloader.fetch() return Manifest(downloader.path)
def fetch_manifest(importer): """ Fetch (download) the manifest. Args: importer (FileImporter): An importer. """ downloader = importer.get_downloader(importer.feed_url) downloader.fetch() return Manifest(downloader.path)
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. """ global metadata_files deferred_download = self.remote.policy != Remote.IMMEDIATE # Interpret download policy async with ProgressReport( message="Downloading Metadata", code="sync.downloading.metadata" ) as pb: parsed_url = urlparse(self.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(url=self.url) result = await downloader.run() await pb.aincrement() metadata_files.append((result.path, self.url.split("/")[-1])) async with ProgressReport( message="Parsing Metadata Lines", code="sync.parsing.metadata" ) as pb: manifest = Manifest(result.path) entries = list(manifest.read()) pb.total = len(entries) await pb.asave() for entry in entries: path = os.path.join(root_dir, entry.relative_path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=entry.relative_path, remote=self.remote, deferred_download=deferred_download, ) dc = DeclarativeContent(content=file, d_artifacts=[da]) await pb.aincrement() await self.put(dc)
class Synchronizer: """ Repository synchronizer for FileContent This object walks through the full standard workflow of running a sync. See the "run" method for details on that workflow. """ def __init__(self, importer): """ Args: importer (Importer): the importer to use for the sync operation """ self._importer = importer self._manifest = None self._inventory_keys = set() self._keys_to_add = set() self._keys_to_remove = set() def run(self): """ Synchronize the repository with the remote repository. This walks through the standard workflow that most sync operations want to follow. This pattern is a recommended starting point for other plugins. - Determine what is available remotely. - Determine what is already in the local repository. - Compare those two, and based on any importer settings or content-type-specific logic, figure out what you want to add and remove from the local repository. - Use a ChangeSet to make those changes happen. """ # Determine what is available remotely self._fetch_manifest() # Determine what is already in the repo self._fetch_inventory() # Based on the above two, figure out what we want to add and remove self._find_delta() additions = SizedIterable(self._build_additions(), len(self._keys_to_add)) removals = SizedIterable(self._build_removals(), len(self._keys_to_remove)) # Hand that to a ChangeSet, and we're done! changeset = ChangeSet(self._importer, additions=additions, removals=removals) changeset.apply_and_drain() def _fetch_manifest(self): """ Fetch (download) the manifest. """ parsed_url = urlparse(self._importer.feed_url) download = self._importer.get_futures_downloader( self._importer.feed_url, os.path.basename(parsed_url.path)) download() self._manifest = Manifest(download.writer.path) def _fetch_inventory(self): """ Fetch existing content in the repository. """ q_set = FileContent.objects.filter( repositories=self._importer.repository) q_set = q_set.only(*[f.name for f in FileContent.natural_key_fields]) for content in (c.cast() for c in q_set): key = Key(path=content.path, digest=content.digest) self._inventory_keys.add(key) def _find_delta(self, mirror=True): """ Using the manifest and set of existing (natural) keys, determine the set of content to be added and deleted from the repository. Expressed in natural key. Args: mirror (bool): Faked mirror option. TODO: should be replaced with something standard. """ # These keys are available remotely. Storing just the natural key makes it memory-efficient # and thus reasonable to hold in RAM even with a large number of content units. remote_keys = set( [Key(path=e.path, digest=e.digest) for e in self._manifest.read()]) self._keys_to_add = remote_keys - self._inventory_keys if mirror: self._keys_to_remove = self._inventory_keys - remote_keys def _build_additions(self): """ Generate the content to be added. This makes a second pass through the manifest. While it does not matter a lot for this plugin specifically, many plugins cannot hold the entire index of remote content in memory at once. They must reduce that to only the natural keys, decide which to retrieve (self.keys_to_add in our case), and then re-iterate the index to access each full entry one at a time. Returns: generator: A generator of content to be added. """ parsed_url = urlparse(self._importer.feed_url) root_dir = os.path.dirname(parsed_url.path) for entry in self._manifest.read(): # Determine if this is an entry we decided to add. key = Key(path=entry.path, digest=entry.digest) if key not in self._keys_to_add: continue # Instantiate the content and artifact based on the manifest entry. path = os.path.join(root_dir, entry.path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(path=entry.path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) # Now that we know what we want to add, hand it to "core" with the API objects. content = PendingContent( file, artifacts={PendingArtifact(artifact, url, entry.path)}) yield content def _build_removals(self): """ Generate the content to be removed. Returns: generator: A generator of FileContent instances to remove from the repository """ for natural_keys in BatchIterator(self._keys_to_remove): q = models.Q() for key in natural_keys: q |= models.Q(filecontent__path=key.path, filecontent__digest=key.digest) q_set = self._importer.repository.content.filter(q) q_set = q_set.only('id') for content in q_set: yield content