Example #1
0
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(
            importer_constants.KEY_VALIDATE,
            default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(
            importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert':
            config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key':
            config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)
        }
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)
Example #2
0
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self.config = config
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS),
            'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER),
            'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS),
            'working_dir': common_utils.get_working_directory()}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

        self.repo_units = []
Example #3
0
    def __init__(self, sync_conduit, config):
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(constants.CONFIG_REMOVE_MISSING_UNITS, default=False)
        self._repo_url = encode_unicode(config.get(constants.CONFIG_FEED_URL))
        self._validate_downloads = config.get(constants.CONFIG_VALIDATE_DOWNLOADS, default=True)

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(constants.CONFIG_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        num_threads = config.get(constants.CONFIG_NUM_THREADS)
        if num_threads is not None:
            num_threads = int(num_threads)
        else:
            num_threads = constants.DEFAULT_NUM_THREADS
        downloader_config = {
            'max_speed': max_speed, 'num_threads': num_threads,
            'ssl_client_cert': config.get(constants.CONFIG_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(constants.CONFIG_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(constants.CONFIG_SSL_CA_CERT), 'ssl_verify_host': 1,
            'ssl_verify_peer': 1, 'proxy_url': config.get(constants.CONFIG_PROXY_URL),
            'proxy_port': config.get(constants.CONFIG_PROXY_PORT),
            'proxy_user': config.get(constants.CONFIG_PROXY_USER),
            'proxy_password': config.get(constants.CONFIG_PROXY_PASSWORD)}
        downloader_config = DownloaderConfig(protocol='https', **downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this class
        self.downloader = factory.get_downloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)
Example #4
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """

    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does
        # not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts)

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for
                             iso in manifest]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _filter_missing_isos(self, manifest):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """

        def _unit_key_str(iso):
            """
            Return a simple string representation of the unit key of the ISO.

            :param iso: The ISO for which we want a unit key string representation
            :type  iso: pulp_rpm.plugins.db.models.ISO
            """
            return '%s-%s-%s' % (iso.name, iso.checksum, iso.size)

        # A list of all the ISOs we have in Pulp
        search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO)
        existing_units = self.sync_conduit.search_all_units(models.ISO.TYPE, search_criteria)
        existing_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit)
                                      for unit in existing_units])
        existing_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit))
                                  for unit in existing_units])

        # A list of units currently associated with the repository
        search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE])
        existing_repo_units = self.sync_conduit.get_units(search_criteria)
        existing_repo_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit))
                                       for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest])
        available_iso_keys = set([_unit_key_str(iso) for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set([iso for iso in available_iso_keys
                                        if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys)
        remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys]

        return local_missing_isos, local_available_units, remote_missing_units

    def _remove_units(self, units):
        """
        Use the sync_conduit's remove_unit call for each unit in units.

        :param units: List of pulp.plugins.model.Units that we want to remove from the repository
        :type  units: list
        """
        for unit in units:
            self.sync_conduit.remove_unit(unit)
Example #5
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self.config = config
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(
            importer_constants.KEY_VALIDATE,
            default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(
            importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed':
            max_speed,
            'max_concurrent':
            max_downloads,
            'ssl_client_cert':
            config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key':
            config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert':
            config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation':
            ssl_validation,
            'proxy_url':
            config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port':
            config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username':
            config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password':
            config.get(importer_constants.KEY_PROXY_PASS),
            'basic_auth_username':
            config.get(importer_constants.KEY_BASIC_AUTH_USER),
            'basic_auth_password':
            config.get(importer_constants.KEY_BASIC_AUTH_PASS),
            'working_dir':
            common_utils.get_working_directory()
        }
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

        self.repo_units = []

    @property
    def download_deferred(self):
        """
        Test the download policy to determine if downloading is deferred.

        :return: True if deferred.
        :rtype: bool
        """
        policy = self.config.get(importer_constants.DOWNLOAD_POLICY,
                                 importer_constants.DOWNLOAD_IMMEDIATE)
        return policy != importer_constants.DOWNLOAD_IMMEDIATE

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            iso.set_storage_path(os.path.basename(report.destination))
            try:
                if self._validate_downloads:
                    iso.validate_iso(report.destination)
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                self._associate_unit(self.sync_conduit.repo, iso)
                iso.safe_import_content(report.destination)

                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def add_catalog_entries(self, units):
        """
        Add entries to the deferred downloading (lazy) catalog.

        Skip entries which are not eligible for lazy catalog.
        (Don't have url attribute.)

        :param units: A list of: pulp_rpm.plugins.db.models.ISO.
        :type units: list
        """
        for unit in units:
            # Unit is from pulp manifest
            if not hasattr(unit, "url"):
                continue
            if not unit.storage_path:
                unit.set_storage_path(unit.name)
            entry = LazyCatalogEntry()
            entry.path = unit.storage_path
            entry.importer_id = str(self.sync_conduit.importer_object_id)
            entry.unit_id = unit.id
            entry.unit_type_id = unit.type_id
            entry.url = unit.url
            entry.checksum = unit.checksum
            # The current ISO model does not define a checksum type, but appears to use sha256.
            # Once the model includes the checksum type, this should use that field.
            entry.checksum_algorithm = 'sha256'
            entry.save_revision()

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest,
                                                  self.download_deferred)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            for iso in local_available_isos:
                self._associate_unit(self.sync_conduit.repo, iso)

        # Deferred downloading (Lazy) entries.
        self.add_catalog_entries(local_available_isos)

        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS

        # Download files and add units.
        if self.download_deferred:
            for iso in local_missing_isos:
                iso.downloaded = False
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                else:
                    self.add_catalog_entries([iso])
                self._associate_unit(self.sync_conduit.repo, iso)
        else:
            self._download_isos(local_missing_isos)

        # Remove unwanted iso units
        if self._remove_missing_units:
            repo_controller.disassociate_units(self.sync_conduit.repo,
                                               remote_missing_isos)
            for unit in remote_missing_isos:
                qs = LazyCatalogEntry.objects.filter(importer_id=str(
                    self.sync_conduit.importer_object_id),
                                                     unit_id=unit.id,
                                                     unit_type_id=unit.type_id)
                qs.delete()

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_directory = common_utils.get_working_directory()
        download_requests = []
        for iso in manifest:
            iso_tmp_dir = tempfile.mkdtemp(dir=download_directory)
            iso_name = os.path.basename(iso.url)
            iso_download_path = os.path.join(iso_tmp_dir, iso_name)
            download_requests.append(
                request.DownloadRequest(iso.url, iso_download_path, iso))
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url,
                                                   manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(
                _("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _(
                'The PULP_MANIFEST file was not in the ' + 'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _associate_unit(self, repo, unit):
        """
        Associate an iso unit with a repository but first check if there's already any with the same
        name and if so, remove them.

        :param repo: An ISO repository that is being synced
        :type  repo: pulp.server.db.model.Repository
        :param unit: An ISO unit to associate with repo
        :type  unit: pulp_rpm.plugins.db.models.ISO
        """
        if not self.repo_units:
            # store the existing repo units to prevent querying mongo multiple times
            self.repo_units = list(
                repo_controller.find_repo_content_units(
                    repo, yield_content_unit=True))

        units_to_remove = [
            iso for iso in self.repo_units if iso['name'] == unit['name']
        ]

        repo_controller.disassociate_units(repo, units_to_remove)
        repo_controller.associate_single_unit(repo, unit)

    def _filter_missing_isos(self, manifest, download_deferred):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :param download_deferred: indicates downloading is deferred (or not).
        :type  download_deferred: bool
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        # A list of all the ISOs we have in Pulp
        existing_units = models.ISO.objects()
        existing_units_by_key = dict([
            (unit.unit_key_str, unit) for unit in existing_units
            if not download_deferred and os.path.isfile(unit.storage_path)
        ])
        existing_units.rewind()
        existing_unit_keys = set([
            unit.unit_key_str for unit in existing_units
            if not download_deferred and os.path.isfile(unit.storage_path)
        ])

        # A list of units currently associated with the repository
        existing_repo_units = repo_controller.find_repo_content_units(
            self.sync_conduit.repo, yield_content_unit=True)
        existing_repo_units = list(existing_repo_units)
        existing_repo_units_by_key = dict([(unit.unit_key_str, unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set(
            [unit.unit_key_str for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(iso.unit_key_str, iso)
                                      for iso in manifest])
        available_iso_keys = set([iso.unit_key_str for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set(
            [iso for iso in available_iso_keys if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [
            existing_units_by_key[k] for k in local_available_iso_keys
        ]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [
            available_isos_by_key[k] for k in local_missing_iso_keys
        ]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys -
                                        available_iso_keys)
        remote_missing_units = [
            existing_repo_units_by_key[k] for k in remote_missing_unit_keys
        ]

        return local_missing_isos, local_available_units, remote_missing_units
Example #6
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """

    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self.config = config
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS),
            'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER),
            'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS),
            'working_dir': common_utils.get_working_directory()}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

        self.repo_units = []

    @property
    def download_deferred(self):
        """
        Test the download policy to determine if downloading is deferred.

        :return: True if deferred.
        :rtype: bool
        """
        policy = self.config.get(
            importer_constants.DOWNLOAD_POLICY,
            importer_constants.DOWNLOAD_IMMEDIATE)
        return policy != importer_constants.DOWNLOAD_IMMEDIATE

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            iso.set_storage_path(os.path.basename(report.destination))
            try:
                if self._validate_downloads:
                    iso.validate_iso(report.destination)
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                self._associate_unit(self.sync_conduit.repo, iso)
                iso.safe_import_content(report.destination)

                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def add_catalog_entries(self, units):
        """
        Add entries to the deferred downloading (lazy) catalog.

        Skip entries which are not eligible for lazy catalog.
        (Don't have url attribute.)

        :param units: A list of: pulp_rpm.plugins.db.models.ISO.
        :type units: list
        """
        for unit in units:
            # Unit is from pulp manifest
            if not hasattr(unit, "url"):
                continue
            if not unit.storage_path:
                unit.set_storage_path(unit.name)
            entry = LazyCatalogEntry()
            entry.path = unit.storage_path
            entry.importer_id = str(self.sync_conduit.importer_object_id)
            entry.unit_id = unit.id
            entry.unit_type_id = unit.type_id
            entry.url = unit.url
            entry.checksum = unit.checksum
            # The current ISO model does not define a checksum type, but appears to use sha256.
            # Once the model includes the checksum type, this should use that field.
            entry.checksum_algorithm = 'sha256'
            entry.save_revision()

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest, self.download_deferred)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO._content_type_id.default, search_dicts)

        # Deferred downloading (Lazy) entries.
        self.add_catalog_entries(local_available_isos)

        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS

        # Download files and add units.
        if self.download_deferred:
            for iso in local_missing_isos:
                iso.downloaded = False
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                else:
                    self.add_catalog_entries([iso])
                self._associate_unit(self.sync_conduit.repo, iso)
        else:
            self._download_isos(local_missing_isos)

        # Remove unwanted iso units
        if self._remove_missing_units:
            repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos)
            for unit in remote_missing_isos:
                qs = LazyCatalogEntry.objects.filter(
                    importer_id=str(self.sync_conduit.importer_object_id),
                    unit_id=unit.id,
                    unit_type_id=unit.type_id)
                qs.delete()

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_directory = common_utils.get_working_directory()
        download_requests = []
        for iso in manifest:
            iso_tmp_dir = tempfile.mkdtemp(dir=download_directory)
            iso_name = os.path.basename(iso.url)
            iso_download_path = os.path.join(iso_tmp_dir, iso_name)
            download_requests.append(request.DownloadRequest(iso.url, iso_download_path, iso))
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _associate_unit(self, repo, unit):
        """
        Associate an iso unit with a repository but first check if there's already any with the same
        name and if so, remove them.

        :param repo: An ISO repository that is being synced
        :type  repo: pulp.server.db.model.Repository
        :param unit: An ISO unit to associate with repo
        :type  unit: pulp_rpm.plugins.db.models.ISO
        """
        if not self.repo_units:
            # store the existing repo units to prevent querying mongo multiple times
            self.repo_units = repo_controller.find_repo_content_units(repo, yield_content_unit=True)

        units_to_remove = [iso for iso in self.repo_units if iso['name'] == unit['name']]

        repo_controller.disassociate_units(repo, units_to_remove)
        repo_controller.associate_single_unit(repo, unit)

    def _filter_missing_isos(self, manifest, download_deferred):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :param download_deferred: indicates downloading is deferred (or not).
        :type  download_deferred: bool
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        # A list of all the ISOs we have in Pulp
        existing_units = models.ISO.objects()
        existing_units_by_key = dict([(unit.unit_key_str, unit)
                                      for unit in existing_units if not download_deferred and
                                      os.path.isfile(unit.storage_path)])
        existing_units.rewind()
        existing_unit_keys = set([unit.unit_key_str
                                  for unit in existing_units if not download_deferred and
                                  os.path.isfile(unit.storage_path)])

        # A list of units currently associated with the repository
        existing_repo_units = repo_controller.find_repo_content_units(
            self.sync_conduit.repo, yield_content_unit=True)
        existing_repo_units = list(existing_repo_units)
        existing_repo_units_by_key = dict([(unit.unit_key_str, unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set([unit.unit_key_str
                                       for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest])
        available_iso_keys = set([iso.unit_key_str for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set([iso for iso in available_iso_keys
                                        if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys)
        remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys]

        return local_missing_isos, local_available_units, remote_missing_units
Example #7
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep the state so
    that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself
    to the downloader library and receive the callbacks when downloads are complete.
    """
    def __init__(self, sync_conduit, config):
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(constants.CONFIG_REMOVE_MISSING_UNITS, default=False)
        self._repo_url = encode_unicode(config.get(constants.CONFIG_FEED_URL))
        self._validate_downloads = config.get(constants.CONFIG_VALIDATE_DOWNLOADS, default=True)

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(constants.CONFIG_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        num_threads = config.get(constants.CONFIG_NUM_THREADS)
        if num_threads is not None:
            num_threads = int(num_threads)
        else:
            num_threads = constants.DEFAULT_NUM_THREADS
        downloader_config = {
            'max_speed': max_speed, 'num_threads': num_threads,
            'ssl_client_cert': config.get(constants.CONFIG_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(constants.CONFIG_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(constants.CONFIG_SSL_CA_CERT), 'ssl_verify_host': 1,
            'ssl_verify_peer': 1, 'proxy_url': config.get(constants.CONFIG_PROXY_URL),
            'proxy_port': config.get(constants.CONFIG_PROXY_PORT),
            'proxy_user': config.get(constants.CONFIG_PROXY_USER),
            'proxy_password': config.get(constants.CONFIG_PROXY_PASSWORD)}
        downloader_config = DownloaderConfig(protocol='https', **downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this class
        self.downloader = factory.get_downloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does not support it
        # and so for now we will just pass
        pass

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to failed for that
        # phase.
        if self.progress_report.manifest_state == STATE_RUNNING:
            self.progress_report.manifest_state = STATE_FAILED
        elif self.progress_report.isos_state == STATE_RUNNING:
            iso = self._url_iso_map[report.url]
            self.progress_report.add_failed_iso(iso, report.error_report)
            del self._url_iso_map[report.url]
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: pulp.common.download.report.DownloadReport
        """
        if self.progress_report.isos_state == STATE_RUNNING:
            iso = self._url_iso_map[report.url]
            additional_bytes_downloaded = report.bytes_downloaded - iso['bytes_downloaded']
            self.progress_report.isos_finished_bytes += additional_bytes_downloaded
            iso['bytes_downloaded'] = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in downloading a
        file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add
        the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: pulp.common.download.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.isos_state == STATE_RUNNING:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = self._url_iso_map[report.url]
            try:
                if self._validate_downloads:
                    self._validate_download(iso)
                self.sync_conduit.save_unit(iso['unit'])
                # We can drop this ISO from the url --> ISO map
                self.progress_report.isos_finished_count += 1
                self.progress_report.update_progress()
                del self._url_iso_map[report.url]
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation accoring to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.manifest_state = STATE_RUNNING
        self.progress_report.update_progress()
        manifest = self._download_manifest()
        self.progress_report.manifest_state = STATE_COMPLETE

        # Go get them filez
        self.progress_report.isos_state = STATE_RUNNING
        self.progress_report.update_progress()
        local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest)
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished
        self.progress_report.isos_state = STATE_COMPLETE
        self.progress_report.update_progress()
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the
        Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download. It is a list of
                         dictionaries with at least the following keys: name, checksum, size, and url.
        :type  manifest: list
        """
        self.progress_report.isos_total_bytes = 0
        self.progress_report.isos_total_count = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want it to be stored,
        # and initialize the Unit that will represent it
        for iso in manifest:
            unit_key = {'name': iso['name'], 'size': iso['size'], 'checksum': iso['checksum']}
            metadata = {}
            relative_path = os.path.join(unit_key['name'], unit_key['checksum'],
                                         str(unit_key['size']), unit_key['name'])
            unit = self.sync_conduit.init_unit(ids.TYPE_ID_ISO, unit_key, metadata, relative_path)
            iso['destination'] = unit.storage_path
            iso['unit'] = unit
            iso['bytes_downloaded'] = 0
            # Set the total bytes onto the report
            self.progress_report.isos_total_bytes += iso['size']
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [request.DownloadRequest(iso['url'], iso['destination']) for iso in manifest]
        # Let's build an index from URL to the manifest unit dictionary, so that we can access data like the
        # name, checksum, and size as we process completed downloads
        self._url_iso_map = dict([(iso['url'], iso) for iso in manifest])
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return a list of the available Units. The available
        units will be a list of dictionaries that describe the available ISOs, with these keys: name,
        checksum, size, and url.

        :return:     list of available ISOs
        :rtype:      list
        """
        manifest_url = urljoin(self._repo_url, constants.ISO_MANIFEST_FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.manifest_state == STATE_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        # Now let's process the manifest and return a list of resources that we'd like to download
        manifest_destiny.seek(0)
        manifest_csv = csv.reader(manifest_destiny)
        manifest = []
        for unit in manifest_csv:
            name, checksum, size = unit
            resource = {'name': name, 'checksum': checksum, 'size': int(size),
                        'url': urljoin(self._repo_url, name)}
            manifest.append(resource)
        return manifest

    def _filter_missing_isos(self, manifest):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not available
        at the feed_url. Return a 2-tuple with this information. The first element of the tuple will be a
        subset of the given manifest that represents the missing ISOs. The second element will be a list of
        units that represent the ISOs we have in our local store that weren't found at the feed_url. The
        manifest is a list of dictionaries that must contain at a minimum the following keys: name, checksum,
        size.

        :param manifest:     A list of dictionaries that describe the ISOs that are available at the
                             feed_url that we are synchronizing with
        :type  manifest:     list
        :return:             A 2-tuple. The first element of the tuple is a list of dictionaries that describe
                             the ISOs that we should retrieve from the feed_url. These dictionaries are in the
                             same format as they were in the manifest. The second element of the tuple is a
                             list of units that represent the ISOs that we have in our local repo that were
                             not found in the remote repo.
        :rtype:              tuple
        """
        def _unit_key_str(unit_key_dict):
            return '%s-%s-%s' % (unit_key_dict['name'], unit_key_dict['checksum'],
                                 unit_key_dict['size'])

        module_criteria = UnitAssociationCriteria(type_ids=[ids.TYPE_ID_ISO])
        existing_units = self.sync_conduit.get_units(criteria=module_criteria)

        available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest])
        existing_units_by_key = dict([(_unit_key_str(unit.unit_key), unit) for unit in existing_units])

        existing_unit_keys = set([_unit_key_str(unit.unit_key) for unit in existing_units])
        available_iso_keys = set([_unit_key_str(iso) for iso in manifest])

        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys]
        remote_missing_unit_keys = list(existing_unit_keys - available_iso_keys)
        remote_missing_units = [existing_units_by_key[k] for k in remote_missing_unit_keys]

        return local_missing_isos, remote_missing_units

    def _remove_units(self, units):
        """
        Use the sync_conduit's remove_unit call for each unit in units.

        :param units: List of pulp.plugins.model.Units that we want to remove from the repository
        :type  units: list
        """
        for unit in units:
            self.sync_conduit.remove_unit(unit)

    def _validate_download(self, iso):
        """
        Validate the size and the checksum of the given downloaded iso. iso should be a dictionary with at
        least these keys: name, checksum, size, and destination.

        :param iso: A dictionary describing the ISO file we want to validate
        :type  iso: dict
        """
        with open(iso['destination']) as destination_file:
            # Validate the size, if we know what it should be
            if 'size' in iso:
                # seek to the end to find the file size with tell()
                destination_file.seek(0, 2)
                size = destination_file.tell()
                if size != iso['size']:
                    raise ValueError(_('Downloading <%(name)s> failed validation. '
                        'The manifest specified that the file should be %(expected)s bytes, but '
                        'the downloaded file is %(found)s bytes.') % {'name': iso['name'],
                            'expected': iso['size'], 'found': size})

            # Validate the checksum, if we know what it should be
            if 'checksum' in iso:
                destination_file.seek(0)
                hasher = hashlib.sha256()
                bits = destination_file.read(VALIDATION_CHUNK_SIZE)
                while bits:
                    hasher.update(bits)
                    bits = destination_file.read(VALIDATION_CHUNK_SIZE)
                # Verify that, son!
                if hasher.hexdigest() != iso['checksum']:
                    raise ValueError(
                        _('Downloading <%(name)s> failed checksum validation. The manifest '
                          'specified the checksum to be %(c)s, but it was %(f)s.') % {
                            'name': iso['name'], 'c': iso['checksum'],
                            'f': hasher.hexdigest()})
Example #8
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep the state so
    that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself
    to the downloader library and receive the callbacks when downloads are complete.
    """
    def __init__(self, sync_conduit, config):
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(importer_constants.KEY_UNITS_REMOVE_MISSING,
                                                default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in downloading a
        file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add
        the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest)
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the
        Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: list
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want it to be stored,
        # and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.common.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError, e:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +\
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest
Example #9
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(
            importer_constants.KEY_VALIDATE,
            default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(
            importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert':
            config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key':
            config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)
        }
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does
        # not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts)

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [
            request.DownloadRequest(iso.url, iso.storage_path, iso)
            for iso in manifest
        ]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url,
                                                   manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(
                _("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _(
                'The PULP_MANIFEST file was not in the ' + 'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _filter_missing_isos(self, manifest):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        def _unit_key_str(iso):
            """
            Return a simple string representation of the unit key of the ISO.

            :param iso: The ISO for which we want a unit key string representation
            :type  iso: pulp_rpm.plugins.db.models.ISO
            """
            return '%s-%s-%s' % (iso.name, iso.checksum, iso.size)

        # A list of all the ISOs we have in Pulp
        search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO)
        existing_units = self.sync_conduit.search_all_units(
            models.ISO.TYPE, search_criteria)
        existing_units_by_key = dict([
            (_unit_key_str(models.ISO.from_unit(unit)), unit)
            for unit in existing_units
        ])
        existing_unit_keys = set([
            _unit_key_str(models.ISO.from_unit(unit))
            for unit in existing_units
        ])

        # A list of units currently associated with the repository
        search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE])
        existing_repo_units = self.sync_conduit.get_units(search_criteria)
        existing_repo_units_by_key = dict([
            (_unit_key_str(models.ISO.from_unit(unit)), unit)
            for unit in existing_repo_units
        ])
        existing_repo_unit_keys = set([
            _unit_key_str(models.ISO.from_unit(unit))
            for unit in existing_repo_units
        ])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(_unit_key_str(iso), iso)
                                      for iso in manifest])
        available_iso_keys = set([_unit_key_str(iso) for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set(
            [iso for iso in available_iso_keys if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [
            existing_units_by_key[k] for k in local_available_iso_keys
        ]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [
            available_isos_by_key[k] for k in local_missing_iso_keys
        ]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys -
                                        available_iso_keys)
        remote_missing_units = [
            existing_repo_units_by_key[k] for k in remote_missing_unit_keys
        ]

        return local_missing_isos, local_available_units, remote_missing_units

    def _remove_units(self, units):
        """
        Use the sync_conduit's remove_unit call for each unit in units.

        :param units: List of pulp.plugins.model.Units that we want to remove from the repository
        :type  units: list
        """
        for unit in units:
            self.sync_conduit.remove_unit(unit)