コード例 #1
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.
        """
        deferred_download = (self.remote.policy != Remote.IMMEDIATE)  # Interpret download policy
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(url=self.remote.url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            manifest = Manifest(result.path)
            for entry in manifest.read():
                path = os.path.join(root_dir, entry.relative_path)
                url = urlunparse(parsed_url._replace(path=path))
                file = FileContent(relative_path=entry.relative_path, digest=entry.digest)
                artifact = Artifact(size=entry.size, sha256=entry.digest)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=url,
                    relative_path=entry.relative_path,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                dc = DeclarativeContent(content=file, d_artifacts=[da])
                pb.increment()
                await self.put(dc)
コード例 #2
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to
        """
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(self.remote.url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            manifest = Manifest(result.path)
            for entry in manifest.read():
                path = os.path.join(root_dir, entry.relative_path)
                url = urlunparse(parsed_url._replace(path=path))
                file = FileContent(relative_path=entry.relative_path,
                                   digest=entry.digest)
                artifact = Artifact(size=entry.size, sha256=entry.digest)
                da = DeclarativeArtifact(artifact, url, entry.relative_path,
                                         self.remote)
                dc = DeclarativeContent(content=file, d_artifacts=[da])
                pb.increment()
                await out_q.put(dc)
        await out_q.put(None)
コード例 #3
0
ファイル: synchronizing.py プロジェクト: daviddavis/pulp_file
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.
        """
        global metadata_files

        deferred_download = self.remote.policy != Remote.IMMEDIATE  # Interpret download policy
        async with ProgressReport(
            message="Downloading Metadata", code="sync.downloading.metadata"
        ) as pb:
            parsed_url = urlparse(self.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(url=self.url)
            result = await downloader.run()
            await pb.aincrement()
            metadata_files.append((result.path, self.url.split("/")[-1]))

        async with ProgressReport(
            message="Parsing Metadata Lines", code="sync.parsing.metadata"
        ) as pb:
            manifest = Manifest(result.path)
            entries = list(manifest.read())

            pb.total = len(entries)
            await pb.asave()

            for entry in entries:
                path = os.path.join(root_dir, entry.relative_path)
                url = urlunparse(parsed_url._replace(path=path))
                file = FileContent(relative_path=entry.relative_path, digest=entry.digest)
                artifact = Artifact(size=entry.size, sha256=entry.digest)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=url,
                    relative_path=entry.relative_path,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                dc = DeclarativeContent(content=file, d_artifacts=[da])
                await pb.aincrement()
                await self.put(dc)
コード例 #4
0
class Synchronizer:
    """
    Repository synchronizer for FileContent

    This object walks through the full standard workflow of running a sync. See the "run" method
    for details on that workflow.
    """
    def __init__(self, importer):
        """
        Args:
            importer (Importer): the importer to use for the sync operation
        """
        self._importer = importer
        self._manifest = None
        self._inventory_keys = set()
        self._keys_to_add = set()
        self._keys_to_remove = set()

    def run(self):
        """
        Synchronize the repository with the remote repository.

        This walks through the standard workflow that most sync operations want to follow. This
        pattern is a recommended starting point for other plugins.

        - Determine what is available remotely.
        - Determine what is already in the local repository.
        - Compare those two, and based on any importer settings or content-type-specific logic,
          figure out what you want to add and remove from the local repository.
        - Use a ChangeSet to make those changes happen.
        """
        # Determine what is available remotely
        self._fetch_manifest()
        # Determine what is already in the repo
        self._fetch_inventory()

        # Based on the above two, figure out what we want to add and remove
        self._find_delta()
        additions = SizedIterable(self._build_additions(),
                                  len(self._keys_to_add))
        removals = SizedIterable(self._build_removals(),
                                 len(self._keys_to_remove))

        # Hand that to a ChangeSet, and we're done!
        changeset = ChangeSet(self._importer,
                              additions=additions,
                              removals=removals)
        changeset.apply_and_drain()

    def _fetch_manifest(self):
        """
        Fetch (download) the manifest.
        """
        parsed_url = urlparse(self._importer.feed_url)
        download = self._importer.get_futures_downloader(
            self._importer.feed_url, os.path.basename(parsed_url.path))
        download()
        self._manifest = Manifest(download.writer.path)

    def _fetch_inventory(self):
        """
        Fetch existing content in the repository.
        """
        q_set = FileContent.objects.filter(
            repositories=self._importer.repository)
        q_set = q_set.only(*[f.name for f in FileContent.natural_key_fields])
        for content in (c.cast() for c in q_set):
            key = Key(path=content.path, digest=content.digest)
            self._inventory_keys.add(key)

    def _find_delta(self, mirror=True):
        """
        Using the manifest and set of existing (natural) keys,
        determine the set of content to be added and deleted from the
        repository.  Expressed in natural key.

        Args:
            mirror (bool): Faked mirror option.
                TODO: should be replaced with something standard.

        """
        # These keys are available remotely. Storing just the natural key makes it memory-efficient
        # and thus reasonable to hold in RAM even with a large number of content units.
        remote_keys = set(
            [Key(path=e.path, digest=e.digest) for e in self._manifest.read()])

        self._keys_to_add = remote_keys - self._inventory_keys
        if mirror:
            self._keys_to_remove = self._inventory_keys - remote_keys

    def _build_additions(self):
        """
        Generate the content to be added.

        This makes a second pass through the manifest. While it does not matter a lot for this
        plugin specifically, many plugins cannot hold the entire index of remote content in memory
        at once. They must reduce that to only the natural keys, decide which to retrieve
        (self.keys_to_add in our case), and then re-iterate the index to access each full entry one
        at a time.

        Returns:
            generator: A generator of content to be added.
        """
        parsed_url = urlparse(self._importer.feed_url)
        root_dir = os.path.dirname(parsed_url.path)

        for entry in self._manifest.read():
            # Determine if this is an entry we decided to add.
            key = Key(path=entry.path, digest=entry.digest)
            if key not in self._keys_to_add:
                continue

            # Instantiate the content and artifact based on the manifest entry.
            path = os.path.join(root_dir, entry.path)
            url = urlunparse(parsed_url._replace(path=path))
            file = FileContent(path=entry.path, digest=entry.digest)
            artifact = Artifact(size=entry.size, sha256=entry.digest)

            # Now that we know what we want to add, hand it to "core" with the API objects.
            content = PendingContent(
                file, artifacts={PendingArtifact(artifact, url, entry.path)})
            yield content

    def _build_removals(self):
        """
        Generate the content to be removed.

        Returns:
            generator: A generator of FileContent instances to remove from the repository
        """
        for natural_keys in BatchIterator(self._keys_to_remove):
            q = models.Q()
            for key in natural_keys:
                q |= models.Q(filecontent__path=key.path,
                              filecontent__digest=key.digest)
            q_set = self._importer.repository.content.filter(q)
            q_set = q_set.only('id')
            for content in q_set:
                yield content