Ejemplo n.º 1
0
class DownloadStep(PluginStep, listener.DownloadEventListener):

    def __init__(self, step_type, downloads=None, repo=None, conduit=None, config=None,
                 working_dir=None, plugin_type=None, description=''):
        """
        Set the default parent and step_type for the Download step

        :param step_type: The id of the step this processes
        :type  step_type: str
        :param downloads: A list of DownloadRequests
        :type  downloads: list of nectar.request.DownloadRequest
        :param repo: The repo to be published
        :type  repo: pulp.plugins.model.Repository
        :param conduit: The conduit for the repo
        :type  conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config: The publish configuration
        :type  config: PluginCallConfiguration
        :param working_dir: The temp directory this step should use for processing
        :type  working_dir: str
        :param plugin_type: The type of the plugin
        :type  plugin_type: str
        :param description: The text description that will be displayed to users
        :type  description: basestring
        """

        super(DownloadStep, self).__init__(step_type, repo=repo, conduit=conduit,
                                           config=config, working_dir=working_dir,
                                           plugin_type=plugin_type)
        if downloads is not None:
            self._downloads = downloads
        else:
            self._downloads = []
        self.step_type = step_type
        self.repo = repo
        self.conduit = conduit
        self.config = config
        self.working_dir = working_dir
        self.plugin_type = plugin_type
        self.description = description

    def initialize(self):
        """
        Set up the nectar downloader

        Originally based on the ISO sync setup
        """
        config = self.get_config()
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use
        # urljoin to determine the path later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        downloader_config = importer_config_to_nectar_config(config.flatten())

        # We will pass self as the event_listener, so that we can receive the
        # callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)

    @property
    def downloads(self):
        """
        This lets the class be instantiated with "downloads" as a generator that
        gets lazily evaluated. This is helpful, because at the time of
        instantiation, it is probably not known what downloads will be
        required.

        :return:    list of download requests (nectar.request.DownloadRequest)
        :rtype:     list
        """
        if not isinstance(self._downloads, list):
            self._downloads = list(self._downloads)
        return self._downloads

    def get_total(self):
        """
        Get total number of items to download

        :returns: number of DownloadRequests
        :rtype: int
        """
        return len(self.downloads)

    def _process_block(self):
        """
        the main "do stuff" method. In this case, just kick off all the
        downloads.
        """
        self.downloader.download(self.downloads)

    # from listener.DownloadEventListener
    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download succeeds. Bump the successes counter and report progress.

        :param report: report (passed in from nectar but currently not used)
        :type  report: pulp.plugins.model.PublishReport
        """
        self.progress_successes += 1
        self.report_progress()

    # from listener.DownloadEventListener
    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails. Bump the failure counter and report progress.

        :param report: report (passed in from nectar but currently not used)
        :type  report: pulp.plugins.model.PublishReport
        """
        self.progress_failures += 1
        self.report_progress()

    def cancel(self):
        """
        Cancel the current step
        """
        super(DownloadStep, self).cancel()
        self.downloader.cancel()
Ejemplo n.º 2
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """

    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does
        # not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts)

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for
                             iso in manifest]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _filter_missing_isos(self, manifest):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """

        def _unit_key_str(iso):
            """
            Return a simple string representation of the unit key of the ISO.

            :param iso: The ISO for which we want a unit key string representation
            :type  iso: pulp_rpm.plugins.db.models.ISO
            """
            return '%s-%s-%s' % (iso.name, iso.checksum, iso.size)

        # A list of all the ISOs we have in Pulp
        search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO)
        existing_units = self.sync_conduit.search_all_units(models.ISO.TYPE, search_criteria)
        existing_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit)
                                      for unit in existing_units])
        existing_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit))
                                  for unit in existing_units])

        # A list of units currently associated with the repository
        search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE])
        existing_repo_units = self.sync_conduit.get_units(search_criteria)
        existing_repo_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit))
                                       for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest])
        available_iso_keys = set([_unit_key_str(iso) for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set([iso for iso in available_iso_keys
                                        if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys)
        remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys]

        return local_missing_isos, local_available_units, remote_missing_units

    def _remove_units(self, units):
        """
        Use the sync_conduit's remove_unit call for each unit in units.

        :param units: List of pulp.plugins.model.Units that we want to remove from the repository
        :type  units: list
        """
        for unit in units:
            self.sync_conduit.remove_unit(unit)
Ejemplo n.º 3
0
class LazyUnitDownloadStep(Step, DownloadEventListener):
    """
    A Step that downloads all the given requests. The downloader is configured
    to download from the Pulp Streamer components.

    :ivar download_requests: The download requests the step will process.
    :type download_requests: list of nectar.request.DownloadRequest
    :ivar download_config:   The keyword args used to initialize the Nectar
                             downloader configuration.
    :type download_config:   dict
    :ivar downloader:        The Nectar downloader used to fetch the requests.
    :type downloader:        nectar.downloaders.threaded.HTTPThreadedDownloader
    """
    def __init__(self, step_type, step_description, lazy_status_conduit,
                 download_requests):
        """
        Initializes a Step that downloads all the download requests provided.

        :param lazy_status_conduit: Conduit used to update the task status.
        :type  lazy_status_conduit: LazyStatusConduit
        :param download_requests:   List of download requests to process.
        :type  download_requests:   list of nectar.request.DownloadRequest
        """
        super(LazyUnitDownloadStep, self).__init__(
            step_type=step_type,
            status_conduit=lazy_status_conduit,
        )
        self.description = step_description
        self.download_requests = download_requests
        self.download_config = {
            MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')),
            HEADERS: {
                PULP_STREAM_REQUEST_HEADER: 'true'
            },
            SSL_VALIDATION: True
        }
        self.downloader = HTTPThreadedDownloader(
            DownloaderConfig(**self.download_config), self)

    def _process_block(self, item=None):
        """
        This block is called by the `process` loop. This is overridden because
        success and failure is determined during the EventListener callbacks,
        which will handle updating the progress. Since `item` is not used, this
        does not make use of `process_main` and simply calls the downloader.

        Inherited from Step.

        :param item: Unused.
        :type  item: None
        """
        self.downloader.download(self.download_requests)

    def get_total(self):
        """
        The total number of download requests so progress reporting occurs at
        the file level.

        Inherited from Step.

        :return: The number of download requests this step will process.
        :rtype:  int
        """
        return len(self.download_requests)

    def download_started(self, report):
        """
        Checks the filesystem for the file that we are about to download,
        and if it exists, raise an exception which will cause Nectar to
        skip the download.

        Inherited from DownloadEventListener.

        :param report: the report associated with the download request.
        :type  report: nectar.report.DownloadReport

        :raises SkipLocation: if the file is already downloaded and matches
                              the checksum stored in the catalog.
        """
        _logger.debug(_('Starting download of {url}.').format(url=report.url))

        # Remove the deferred entry now that the download has started.
        query_set = DeferredDownload.objects.filter(
            unit_id=report.data[UNIT_ID], unit_type_id=report.data[TYPE_ID])
        query_set.delete()

        try:
            # If the file exists and the checksum is valid, don't download it
            path_entry = report.data[UNIT_FILES][report.destination]
            catalog_entry = path_entry[CATALOG_ENTRY]
            self.validate_file(catalog_entry.path,
                               catalog_entry.checksum_algorithm,
                               catalog_entry.checksum)
            path_entry[PATH_DOWNLOADED] = True
            self.progress_successes += 1
            self.report_progress()
            msg = _('{path} has already been downloaded.').format(
                path=path_entry[CATALOG_ENTRY].path)
            _logger.debug(msg)
            raise SkipLocation()
        except (InvalidChecksumType, VerificationException, IOError):
            # It's either missing or incorrect, so download it
            pass

    def download_succeeded(self, report):
        """
        Marks the individual file for the unit as downloaded and moves it into
        its final storage location if its checksum value matches the value in
        the catalog entry (if present).

        Inherited from DownloadEventListener.

        :param report: the report associated with the download request.
        :type  report: nectar.report.DownloadReport
        """
        # Reload the content unit
        unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID])
        unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID])
        content_unit = unit_qs.only('_content_type_id', 'id',
                                    '_last_updated').get()
        path_entry = report.data[UNIT_FILES][report.destination]

        # Validate the file and update the progress.
        catalog_entry = path_entry[CATALOG_ENTRY]
        try:
            self.validate_file(report.destination,
                               catalog_entry.checksum_algorithm,
                               catalog_entry.checksum)

            relative_path = os.path.relpath(catalog_entry.path,
                                            FileStorage.get_path(content_unit))
            if len(report.data[UNIT_FILES]) == 1:
                # If the unit is single-file, update the storage path to point to the file
                content_unit.set_storage_path(relative_path)
                unit_qs.update_one(
                    set___storage_path=content_unit._storage_path)
                content_unit.import_content(report.destination)
            else:
                content_unit.import_content(report.destination,
                                            location=relative_path)
            self.progress_successes += 1
            path_entry[PATH_DOWNLOADED] = True
        except (InvalidChecksumType, VerificationException, IOError), e:
            _logger.debug(
                _('Download of {path} failed: {reason}.').format(
                    path=catalog_entry.path, reason=str(e)))
            path_entry[PATH_DOWNLOADED] = False
            self.progress_failures += 1
        self.report_progress()

        # Mark the entire unit as downloaded, if necessary.
        download_flags = [
            entry[PATH_DOWNLOADED]
            for entry in report.data[UNIT_FILES].values()
        ]
        if all(download_flags):
            _logger.debug(
                _('Marking content unit {type}:{id} as downloaded.').format(
                    type=content_unit.type_id, id=content_unit.id))
            unit_qs.update_one(set__downloaded=True)
Ejemplo n.º 4
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self.config = config
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(
            importer_constants.KEY_VALIDATE,
            default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(
            importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed':
            max_speed,
            'max_concurrent':
            max_downloads,
            'ssl_client_cert':
            config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key':
            config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert':
            config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation':
            ssl_validation,
            'proxy_url':
            config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port':
            config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username':
            config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password':
            config.get(importer_constants.KEY_PROXY_PASS),
            'basic_auth_username':
            config.get(importer_constants.KEY_BASIC_AUTH_USER),
            'basic_auth_password':
            config.get(importer_constants.KEY_BASIC_AUTH_PASS),
            'working_dir':
            common_utils.get_working_directory()
        }
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

        self.repo_units = []

    @property
    def download_deferred(self):
        """
        Test the download policy to determine if downloading is deferred.

        :return: True if deferred.
        :rtype: bool
        """
        policy = self.config.get(importer_constants.DOWNLOAD_POLICY,
                                 importer_constants.DOWNLOAD_IMMEDIATE)
        return policy != importer_constants.DOWNLOAD_IMMEDIATE

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            iso.set_storage_path(os.path.basename(report.destination))
            try:
                if self._validate_downloads:
                    iso.validate_iso(report.destination)
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                self._associate_unit(self.sync_conduit.repo, iso)
                iso.safe_import_content(report.destination)

                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def add_catalog_entries(self, units):
        """
        Add entries to the deferred downloading (lazy) catalog.

        Skip entries which are not eligible for lazy catalog.
        (Don't have url attribute.)

        :param units: A list of: pulp_rpm.plugins.db.models.ISO.
        :type units: list
        """
        for unit in units:
            # Unit is from pulp manifest
            if not hasattr(unit, "url"):
                continue
            if not unit.storage_path:
                unit.set_storage_path(unit.name)
            entry = LazyCatalogEntry()
            entry.path = unit.storage_path
            entry.importer_id = str(self.sync_conduit.importer_object_id)
            entry.unit_id = unit.id
            entry.unit_type_id = unit.type_id
            entry.url = unit.url
            entry.checksum = unit.checksum
            # The current ISO model does not define a checksum type, but appears to use sha256.
            # Once the model includes the checksum type, this should use that field.
            entry.checksum_algorithm = 'sha256'
            entry.save_revision()

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest,
                                                  self.download_deferred)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            for iso in local_available_isos:
                self._associate_unit(self.sync_conduit.repo, iso)

        # Deferred downloading (Lazy) entries.
        self.add_catalog_entries(local_available_isos)

        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS

        # Download files and add units.
        if self.download_deferred:
            for iso in local_missing_isos:
                iso.downloaded = False
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                else:
                    self.add_catalog_entries([iso])
                self._associate_unit(self.sync_conduit.repo, iso)
        else:
            self._download_isos(local_missing_isos)

        # Remove unwanted iso units
        if self._remove_missing_units:
            repo_controller.disassociate_units(self.sync_conduit.repo,
                                               remote_missing_isos)
            for unit in remote_missing_isos:
                qs = LazyCatalogEntry.objects.filter(importer_id=str(
                    self.sync_conduit.importer_object_id),
                                                     unit_id=unit.id,
                                                     unit_type_id=unit.type_id)
                qs.delete()

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_directory = common_utils.get_working_directory()
        download_requests = []
        for iso in manifest:
            iso_tmp_dir = tempfile.mkdtemp(dir=download_directory)
            iso_name = os.path.basename(iso.url)
            iso_download_path = os.path.join(iso_tmp_dir, iso_name)
            download_requests.append(
                request.DownloadRequest(iso.url, iso_download_path, iso))
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url,
                                                   manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(
                _("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _(
                'The PULP_MANIFEST file was not in the ' + 'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _associate_unit(self, repo, unit):
        """
        Associate an iso unit with a repository but first check if there's already any with the same
        name and if so, remove them.

        :param repo: An ISO repository that is being synced
        :type  repo: pulp.server.db.model.Repository
        :param unit: An ISO unit to associate with repo
        :type  unit: pulp_rpm.plugins.db.models.ISO
        """
        if not self.repo_units:
            # store the existing repo units to prevent querying mongo multiple times
            self.repo_units = list(
                repo_controller.find_repo_content_units(
                    repo, yield_content_unit=True))

        units_to_remove = [
            iso for iso in self.repo_units if iso['name'] == unit['name']
        ]

        repo_controller.disassociate_units(repo, units_to_remove)
        repo_controller.associate_single_unit(repo, unit)

    def _filter_missing_isos(self, manifest, download_deferred):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :param download_deferred: indicates downloading is deferred (or not).
        :type  download_deferred: bool
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        # A list of all the ISOs we have in Pulp
        existing_units = models.ISO.objects()
        existing_units_by_key = dict([
            (unit.unit_key_str, unit) for unit in existing_units
            if not download_deferred and os.path.isfile(unit.storage_path)
        ])
        existing_units.rewind()
        existing_unit_keys = set([
            unit.unit_key_str for unit in existing_units
            if not download_deferred and os.path.isfile(unit.storage_path)
        ])

        # A list of units currently associated with the repository
        existing_repo_units = repo_controller.find_repo_content_units(
            self.sync_conduit.repo, yield_content_unit=True)
        existing_repo_units = list(existing_repo_units)
        existing_repo_units_by_key = dict([(unit.unit_key_str, unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set(
            [unit.unit_key_str for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(iso.unit_key_str, iso)
                                      for iso in manifest])
        available_iso_keys = set([iso.unit_key_str for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set(
            [iso for iso in available_iso_keys if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [
            existing_units_by_key[k] for k in local_available_iso_keys
        ]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [
            available_isos_by_key[k] for k in local_missing_iso_keys
        ]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys -
                                        available_iso_keys)
        remote_missing_units = [
            existing_repo_units_by_key[k] for k in remote_missing_unit_keys
        ]

        return local_missing_isos, local_available_units, remote_missing_units
Ejemplo n.º 5
0
class LazyUnitDownloadStep(Step, DownloadEventListener):
    """
    A Step that downloads all the given requests. The downloader is configured
    to download from the Pulp Streamer components.

    :ivar download_requests: The download requests the step will process.
    :type download_requests: list of nectar.request.DownloadRequest
    :ivar download_config:   The keyword args used to initialize the Nectar
                             downloader configuration.
    :type download_config:   dict
    :ivar downloader:        The Nectar downloader used to fetch the requests.
    :type downloader:        nectar.downloaders.threaded.HTTPThreadedDownloader
    """

    def __init__(self, step_type, step_description, lazy_status_conduit, download_requests):
        """
        Initializes a Step that downloads all the download requests provided.

        :param lazy_status_conduit: Conduit used to update the task status.
        :type  lazy_status_conduit: LazyStatusConduit
        :param download_requests:   List of download requests to process.
        :type  download_requests:   list of nectar.request.DownloadRequest
        """
        super(LazyUnitDownloadStep, self).__init__(
            step_type=step_type,
            status_conduit=lazy_status_conduit,
        )
        self.description = step_description
        self.download_requests = download_requests
        self.download_config = {
            MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')),
            HEADERS: {PULP_STREAM_REQUEST_HEADER: 'true'},
            SSL_VALIDATION: True
        }
        self.downloader = HTTPThreadedDownloader(
            DownloaderConfig(**self.download_config),
            self
        )

    def _process_block(self, item=None):
        """
        This block is called by the `process` loop. This is overridden because
        success and failure is determined during the EventListener callbacks,
        which will handle updating the progress. Since `item` is not used, this
        does not make use of `process_main` and simply calls the downloader.

        Inherited from Step.

        :param item: Unused.
        :type  item: None
        """
        self.downloader.download(self.download_requests)

    def get_total(self):
        """
        The total number of download requests so progress reporting occurs at
        the file level.

        Inherited from Step.

        :return: The number of download requests this step will process.
        :rtype:  int
        """
        return len(self.download_requests)

    def download_started(self, report):
        """
        Checks the filesystem for the file that we are about to download,
        and if it exists, raise an exception which will cause Nectar to
        skip the download.

        Inherited from DownloadEventListener.

        :param report: the report associated with the download request.
        :type  report: nectar.report.DownloadReport

        :raises SkipLocation: if the file is already downloaded and matches
                              the checksum stored in the catalog.
        """
        _logger.debug(_('Starting download of {url}.').format(url=report.url))

        # Remove the deferred entry now that the download has started.
        query_set = DeferredDownload.objects.filter(
            unit_id=report.data[UNIT_ID],
            unit_type_id=report.data[TYPE_ID]
        )
        query_set.delete()

        try:
            # If the file exists and the checksum is valid, don't download it
            path_entry = report.data[UNIT_FILES][report.destination]
            catalog_entry = path_entry[CATALOG_ENTRY]
            self.validate_file(
                catalog_entry.path,
                catalog_entry.checksum_algorithm,
                catalog_entry.checksum
            )
            path_entry[PATH_DOWNLOADED] = True
            self.progress_successes += 1
            self.report_progress()
            msg = _('{path} has already been downloaded.').format(
                path=path_entry[CATALOG_ENTRY].path)
            _logger.debug(msg)
            raise SkipLocation()
        except (InvalidChecksumType, VerificationException, IOError):
            # It's either missing or incorrect, so download it
            pass

    def download_succeeded(self, report):
        """
        Marks the individual file for the unit as downloaded and moves it into
        its final storage location if its checksum value matches the value in
        the catalog entry (if present).

        Inherited from DownloadEventListener.

        :param report: the report associated with the download request.
        :type  report: nectar.report.DownloadReport
        """
        # Reload the content unit
        unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID])
        unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID])
        content_unit = unit_qs.only('_content_type_id', 'id', '_last_updated').get()
        path_entry = report.data[UNIT_FILES][report.destination]

        # Validate the file and update the progress.
        catalog_entry = path_entry[CATALOG_ENTRY]
        try:
            self.validate_file(
                report.destination,
                catalog_entry.checksum_algorithm,
                catalog_entry.checksum
            )

            relative_path = os.path.relpath(
                catalog_entry.path,
                FileStorage.get_path(content_unit)
            )
            if len(report.data[UNIT_FILES]) == 1:
                # If the unit is single-file, update the storage path to point to the file
                content_unit.set_storage_path(relative_path)
                unit_qs.update_one(set___storage_path=content_unit._storage_path)
                content_unit.import_content(report.destination)
            else:
                content_unit.import_content(report.destination, location=relative_path)
            self.progress_successes += 1
            path_entry[PATH_DOWNLOADED] = True
        except (InvalidChecksumType, VerificationException, IOError), e:
            _logger.debug(_('Download of {path} failed: {reason}.').format(
                path=catalog_entry.path, reason=str(e)))
            path_entry[PATH_DOWNLOADED] = False
            self.progress_failures += 1
        self.report_progress()

        # Mark the entire unit as downloaded, if necessary.
        download_flags = [entry[PATH_DOWNLOADED] for entry in
                          report.data[UNIT_FILES].values()]
        if all(download_flags):
            _logger.debug(_('Marking content unit {type}:{id} as downloaded.').format(
                type=content_unit.type_id, id=content_unit.id))
            unit_qs.update_one(set__downloaded=True)
Ejemplo n.º 6
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """

    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self.config = config
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS),
            'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER),
            'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS),
            'working_dir': common_utils.get_working_directory()}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

        self.repo_units = []

    @property
    def download_deferred(self):
        """
        Test the download policy to determine if downloading is deferred.

        :return: True if deferred.
        :rtype: bool
        """
        policy = self.config.get(
            importer_constants.DOWNLOAD_POLICY,
            importer_constants.DOWNLOAD_IMMEDIATE)
        return policy != importer_constants.DOWNLOAD_IMMEDIATE

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            iso.set_storage_path(os.path.basename(report.destination))
            try:
                if self._validate_downloads:
                    iso.validate_iso(report.destination)
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                self._associate_unit(self.sync_conduit.repo, iso)
                iso.safe_import_content(report.destination)

                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def add_catalog_entries(self, units):
        """
        Add entries to the deferred downloading (lazy) catalog.

        Skip entries which are not eligible for lazy catalog.
        (Don't have url attribute.)

        :param units: A list of: pulp_rpm.plugins.db.models.ISO.
        :type units: list
        """
        for unit in units:
            # Unit is from pulp manifest
            if not hasattr(unit, "url"):
                continue
            if not unit.storage_path:
                unit.set_storage_path(unit.name)
            entry = LazyCatalogEntry()
            entry.path = unit.storage_path
            entry.importer_id = str(self.sync_conduit.importer_object_id)
            entry.unit_id = unit.id
            entry.unit_type_id = unit.type_id
            entry.url = unit.url
            entry.checksum = unit.checksum
            # The current ISO model does not define a checksum type, but appears to use sha256.
            # Once the model includes the checksum type, this should use that field.
            entry.checksum_algorithm = 'sha256'
            entry.save_revision()

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest, self.download_deferred)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO._content_type_id.default, search_dicts)

        # Deferred downloading (Lazy) entries.
        self.add_catalog_entries(local_available_isos)

        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS

        # Download files and add units.
        if self.download_deferred:
            for iso in local_missing_isos:
                iso.downloaded = False
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                else:
                    self.add_catalog_entries([iso])
                self._associate_unit(self.sync_conduit.repo, iso)
        else:
            self._download_isos(local_missing_isos)

        # Remove unwanted iso units
        if self._remove_missing_units:
            repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos)
            for unit in remote_missing_isos:
                qs = LazyCatalogEntry.objects.filter(
                    importer_id=str(self.sync_conduit.importer_object_id),
                    unit_id=unit.id,
                    unit_type_id=unit.type_id)
                qs.delete()

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_directory = common_utils.get_working_directory()
        download_requests = []
        for iso in manifest:
            iso_tmp_dir = tempfile.mkdtemp(dir=download_directory)
            iso_name = os.path.basename(iso.url)
            iso_download_path = os.path.join(iso_tmp_dir, iso_name)
            download_requests.append(request.DownloadRequest(iso.url, iso_download_path, iso))
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _associate_unit(self, repo, unit):
        """
        Associate an iso unit with a repository but first check if there's already any with the same
        name and if so, remove them.

        :param repo: An ISO repository that is being synced
        :type  repo: pulp.server.db.model.Repository
        :param unit: An ISO unit to associate with repo
        :type  unit: pulp_rpm.plugins.db.models.ISO
        """
        if not self.repo_units:
            # store the existing repo units to prevent querying mongo multiple times
            self.repo_units = repo_controller.find_repo_content_units(repo, yield_content_unit=True)

        units_to_remove = [iso for iso in self.repo_units if iso['name'] == unit['name']]

        repo_controller.disassociate_units(repo, units_to_remove)
        repo_controller.associate_single_unit(repo, unit)

    def _filter_missing_isos(self, manifest, download_deferred):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :param download_deferred: indicates downloading is deferred (or not).
        :type  download_deferred: bool
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        # A list of all the ISOs we have in Pulp
        existing_units = models.ISO.objects()
        existing_units_by_key = dict([(unit.unit_key_str, unit)
                                      for unit in existing_units if not download_deferred and
                                      os.path.isfile(unit.storage_path)])
        existing_units.rewind()
        existing_unit_keys = set([unit.unit_key_str
                                  for unit in existing_units if not download_deferred and
                                  os.path.isfile(unit.storage_path)])

        # A list of units currently associated with the repository
        existing_repo_units = repo_controller.find_repo_content_units(
            self.sync_conduit.repo, yield_content_unit=True)
        existing_repo_units = list(existing_repo_units)
        existing_repo_units_by_key = dict([(unit.unit_key_str, unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set([unit.unit_key_str
                                       for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest])
        available_iso_keys = set([iso.unit_key_str for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set([iso for iso in available_iso_keys
                                        if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys)
        remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys]

        return local_missing_isos, local_available_units, remote_missing_units
Ejemplo n.º 7
0
class DownloadStep(PluginStep, listener.DownloadEventListener):
    def __init__(self,
                 step_type,
                 downloads=None,
                 repo=None,
                 conduit=None,
                 config=None,
                 working_dir=None,
                 plugin_type=None,
                 description=''):
        """
        Set the default parent and step_type for the Download step

        :param step_type: The id of the step this processes
        :type  step_type: str
        :param downloads: A list of DownloadRequests
        :type  downloads: list of nectar.request.DownloadRequest
        :param repo: The repo to be published
        :type  repo: pulp.plugins.model.Repository
        :param conduit: The conduit for the repo
        :type  conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config: The publish configuration
        :type  config: PluginCallConfiguration
        :param working_dir: The temp directory this step should use for processing
        :type  working_dir: str
        :param plugin_type: The type of the plugin
        :type  plugin_type: str
        :param description: The text description that will be displayed to users
        :type  description: basestring
        """

        super(DownloadStep, self).__init__(step_type,
                                           repo=repo,
                                           conduit=conduit,
                                           config=config,
                                           working_dir=working_dir,
                                           plugin_type=plugin_type)
        if downloads is not None:
            self._downloads = downloads
        else:
            self._downloads = []
        self.step_type = step_type
        self.repo = repo
        self.conduit = conduit
        self.config = config
        self.working_dir = working_dir
        self.plugin_type = plugin_type
        self.description = description

    def initialize(self):
        """
        Set up the nectar downloader

        Originally based on the ISO sync setup
        """
        config = self.get_config()
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=True)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use
        # urljoin to determine the path later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        downloader_config = importer_config_to_nectar_config(config.flatten())

        # We will pass self as the event_listener, so that we can receive the
        # callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)

    @property
    def downloads(self):
        """
        This lets the class be instantiated with "downloads" as a generator that
        gets lazily evaluated. This is helpful, because at the time of
        instantiation, it is probably not known what downloads will be
        required.

        :return:    list of download requests (nectar.request.DownloadRequest)
        :rtype:     list
        """
        if not isinstance(self._downloads, list):
            self._downloads = list(self._downloads)
        return self._downloads

    def get_total(self):
        """
        Get total number of items to download

        :returns: number of DownloadRequests
        :rtype: int
        """
        return len(self.downloads)

    def _process_block(self):
        """
        the main "do stuff" method. In this case, just kick off all the
        downloads.
        """
        self.downloader.download(self.downloads)

    # from listener.DownloadEventListener
    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download succeeds. Bump the successes counter and report progress.

        :param report: report (passed in from nectar but currently not used)
        :type  report: pulp.plugins.model.PublishReport
        """
        self.progress_successes += 1
        self.report_progress()

    # from listener.DownloadEventListener
    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails. Bump the failure counter and report progress.

        :param report: report (passed in from nectar but currently not used)
        :type  report: pulp.plugins.model.PublishReport
        """
        self.progress_failures += 1
        self.report_progress()

    def cancel(self):
        """
        Cancel the current step
        """
        super(DownloadStep, self).cancel()
        self.downloader.cancel()
Ejemplo n.º 8
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep the state so
    that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself
    to the downloader library and receive the callbacks when downloads are complete.
    """
    def __init__(self, sync_conduit, config):
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(importer_constants.KEY_UNITS_REMOVE_MISSING,
                                                default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in downloading a
        file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add
        the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest)
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the
        Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: list
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want it to be stored,
        # and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.common.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError, e:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +\
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest
Ejemplo n.º 9
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(
            importer_constants.KEY_VALIDATE,
            default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(
            importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert':
            config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key':
            config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)
        }
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does
        # not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts)

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [
            request.DownloadRequest(iso.url, iso.storage_path, iso)
            for iso in manifest
        ]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url,
                                                   manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(
                _("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _(
                'The PULP_MANIFEST file was not in the ' + 'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _filter_missing_isos(self, manifest):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        def _unit_key_str(iso):
            """
            Return a simple string representation of the unit key of the ISO.

            :param iso: The ISO for which we want a unit key string representation
            :type  iso: pulp_rpm.plugins.db.models.ISO
            """
            return '%s-%s-%s' % (iso.name, iso.checksum, iso.size)

        # A list of all the ISOs we have in Pulp
        search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO)
        existing_units = self.sync_conduit.search_all_units(
            models.ISO.TYPE, search_criteria)
        existing_units_by_key = dict([
            (_unit_key_str(models.ISO.from_unit(unit)), unit)
            for unit in existing_units
        ])
        existing_unit_keys = set([
            _unit_key_str(models.ISO.from_unit(unit))
            for unit in existing_units
        ])

        # A list of units currently associated with the repository
        search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE])
        existing_repo_units = self.sync_conduit.get_units(search_criteria)
        existing_repo_units_by_key = dict([
            (_unit_key_str(models.ISO.from_unit(unit)), unit)
            for unit in existing_repo_units
        ])
        existing_repo_unit_keys = set([
            _unit_key_str(models.ISO.from_unit(unit))
            for unit in existing_repo_units
        ])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(_unit_key_str(iso), iso)
                                      for iso in manifest])
        available_iso_keys = set([_unit_key_str(iso) for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set(
            [iso for iso in available_iso_keys if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [
            existing_units_by_key[k] for k in local_available_iso_keys
        ]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [
            available_isos_by_key[k] for k in local_missing_iso_keys
        ]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys -
                                        available_iso_keys)
        remote_missing_units = [
            existing_repo_units_by_key[k] for k in remote_missing_unit_keys
        ]

        return local_missing_isos, local_available_units, remote_missing_units

    def _remove_units(self, units):
        """
        Use the sync_conduit's remove_unit call for each unit in units.

        :param units: List of pulp.plugins.model.Units that we want to remove from the repository
        :type  units: list
        """
        for unit in units:
            self.sync_conduit.remove_unit(unit)