class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does # not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts) # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ def _unit_key_str(iso): """ Return a simple string representation of the unit key of the ISO. :param iso: The ISO for which we want a unit key string representation :type iso: pulp_rpm.plugins.db.models.ISO """ return '%s-%s-%s' % (iso.name, iso.checksum, iso.size) # A list of all the ISOs we have in Pulp search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO) existing_units = self.sync_conduit.search_all_units(models.ISO.TYPE, search_criteria) existing_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_units]) existing_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit)) for unit in existing_units]) # A list of units currently associated with the repository search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE]) existing_repo_units = self.sync_conduit.get_units(search_criteria) existing_repo_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_repo_units]) existing_repo_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit)) for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest]) available_iso_keys = set([_unit_key_str(iso) for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set([iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys] return local_missing_isos, local_available_units, remote_missing_units def _remove_units(self, units): """ Use the sync_conduit's remove_unit call for each unit in units. :param units: List of pulp.plugins.model.Units that we want to remove from the repository :type units: list """ for unit in units: self.sync_conduit.remove_unit(unit)
class DownloadStep(PluginStep, listener.DownloadEventListener): def __init__(self, step_type, downloads=None, repo=None, conduit=None, config=None, working_dir=None, plugin_type=None, description=''): """ Set the default parent and step_type for the Download step :param step_type: The id of the step this processes :type step_type: str :param downloads: A list of DownloadRequests :type downloads: list of nectar.request.DownloadRequest :param repo: The repo to be published :type repo: pulp.plugins.model.Repository :param conduit: The conduit for the repo :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: The publish configuration :type config: PluginCallConfiguration :param working_dir: The temp directory this step should use for processing :type working_dir: str :param plugin_type: The type of the plugin :type plugin_type: str :param description: The text description that will be displayed to users :type description: basestring """ super(DownloadStep, self).__init__(step_type, repo=repo, conduit=conduit, config=config, working_dir=working_dir, plugin_type=plugin_type) if downloads is not None: self._downloads = downloads else: self._downloads = [] self.step_type = step_type self.repo = repo self.conduit = conduit self.config = config self.working_dir = working_dir self.plugin_type = plugin_type self.description = description def initialize(self): """ Set up the nectar downloader Originally based on the ISO sync setup """ config = self.get_config() self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use # urljoin to determine the path later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' downloader_config = importer_config_to_nectar_config(config.flatten()) # We will pass self as the event_listener, so that we can receive the # callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) @property def downloads(self): """ This lets the class be instantiated with "downloads" as a generator that gets lazily evaluated. This is helpful, because at the time of instantiation, it is probably not known what downloads will be required. :return: list of download requests (nectar.request.DownloadRequest) :rtype: list """ if not isinstance(self._downloads, list): self._downloads = list(self._downloads) return self._downloads def get_total(self): """ Get total number of items to download :returns: number of DownloadRequests :rtype: int """ return len(self.downloads) def _process_block(self): """ the main "do stuff" method. In this case, just kick off all the downloads. """ self.downloader.download(self.downloads) # from listener.DownloadEventListener def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when any individual download succeeds. Bump the successes counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_successes += 1 self.report_progress() # from listener.DownloadEventListener def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. Bump the failure counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_failures += 1 self.report_progress() def cancel(self): """ Cancel the current step """ super(DownloadStep, self).cancel() self.downloader.cancel()
class DownloadStep(PluginStep, listener.DownloadEventListener): def __init__(self, step_type, downloads=None, repo=None, conduit=None, config=None, working_dir=None, plugin_type=None, description=''): """ Set the default parent and step_type for the Download step :param step_type: The id of the step this processes :type step_type: str :param downloads: A list of DownloadRequests :type downloads: list of nectar.request.DownloadRequest :param repo: The repo to be published :type repo: pulp.plugins.model.Repository :param conduit: The conduit for the repo :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: The publish configuration :type config: PluginCallConfiguration :param working_dir: The temp directory this step should use for processing :type working_dir: str :param plugin_type: The type of the plugin :type plugin_type: str :param description: The text description that will be displayed to users :type description: basestring """ super(DownloadStep, self).__init__(step_type, repo=repo, conduit=conduit, config=config, working_dir=working_dir, plugin_type=plugin_type) if downloads is not None: self._downloads = downloads else: self._downloads = [] self.step_type = step_type self.repo = repo self.conduit = conduit self.config = config self.working_dir = working_dir self.plugin_type = plugin_type self.description = description def initialize(self): """ Set up the nectar downloader Originally based on the ISO sync setup """ config = self.get_config() self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use # urljoin to determine the path later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' downloader_config = importer_config_to_nectar_config(config.flatten()) # We will pass self as the event_listener, so that we can receive the # callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) @property def downloads(self): """ This lets the class be instantiated with "downloads" as a generator that gets lazily evaluated. This is helpful, because at the time of instantiation, it is probably not known what downloads will be required. :return: list of download requests (nectar.request.DownloadRequest) :rtype: list """ if not isinstance(self._downloads, list): self._downloads = list(self._downloads) return self._downloads def get_total(self): """ Get total number of items to download :returns: number of DownloadRequests :rtype: int """ return len(self.downloads) def _process_block(self): """ the main "do stuff" method. In this case, just kick off all the downloads. """ self.downloader.download(self.downloads) # from listener.DownloadEventListener def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when any individual download succeeds. Bump the successes counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_successes += 1 self.report_progress() # from listener.DownloadEventListener def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. Bump the failure counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_failures += 1 self.report_progress() def cancel(self): """ Cancel the current step """ super(DownloadStep, self).cancel() self.downloader.cancel()
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get( importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean( importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS) } downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) @property def download_deferred(self): """ Test the download policy to determine if downloading is deferred. :return: True if deferred. :rtype: bool """ policy = self.config.get(importer_constants.DOWNLOAD_POLICY, importer_constants.DOWNLOAD_IMMEDIATE) return policy != importer_constants.DOWNLOAD_IMMEDIATE def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does # not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) iso.save() iso.import_content(report.destination) repo_controller.associate_single_unit(self.sync_conduit.repo, iso) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def add_catalog_entries(self, units): """ Add entries to the deferred downloading (lazy) catalog. :param units: A list of: pulp_rpm.plugins.db.models.ISO. :type units: list """ for unit in units: unit.set_storage_path(unit.name) entry = LazyCatalogEntry() entry.path = unit.storage_path entry.importer_id = str(self.sync_conduit.importer_object_id) entry.unit_id = unit.id entry.unit_type_id = unit.type_id entry.url = unit.url entry.save_revision() def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO._content_type_id, search_dicts) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_missing_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False iso.save() repo_controller.associate_single_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append( request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError( _("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _( 'The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ # A list of all the ISOs we have in Pulp existing_units = models.ISO.objects() existing_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_units]) existing_units.rewind() existing_unit_keys = set( [unit.unit_key_str for unit in existing_units]) # A list of units currently associated with the repository existing_repo_units = repo_controller.find_repo_content_units( self.sync_conduit.repo, yield_content_unit=True) existing_repo_units = list(existing_repo_units) existing_repo_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_repo_units]) existing_repo_unit_keys = set( [unit.unit_key_str for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest]) available_iso_keys = set([iso.unit_key_str for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set( [iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [ existing_units_by_key[k] for k in local_available_iso_keys ] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [ available_isos_by_key[k] for k in local_missing_iso_keys ] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [ existing_repo_units_by_key[k] for k in remote_missing_unit_keys ] return local_missing_isos, local_available_units, remote_missing_units
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): self.sync_conduit = sync_conduit self._remove_missing_units = config.get(importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest) self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: list """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want it to be stored, # and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.common.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError, e: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +\ 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest