class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does # not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts) # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ def _unit_key_str(iso): """ Return a simple string representation of the unit key of the ISO. :param iso: The ISO for which we want a unit key string representation :type iso: pulp_rpm.plugins.db.models.ISO """ return '%s-%s-%s' % (iso.name, iso.checksum, iso.size) # A list of all the ISOs we have in Pulp search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO) existing_units = self.sync_conduit.search_all_units(models.ISO.TYPE, search_criteria) existing_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_units]) existing_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit)) for unit in existing_units]) # A list of units currently associated with the repository search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE]) existing_repo_units = self.sync_conduit.get_units(search_criteria) existing_repo_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_repo_units]) existing_repo_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit)) for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest]) available_iso_keys = set([_unit_key_str(iso) for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set([iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys] return local_missing_isos, local_available_units, remote_missing_units def _remove_units(self, units): """ Use the sync_conduit's remove_unit call for each unit in units. :param units: List of pulp.plugins.model.Units that we want to remove from the repository :type units: list """ for unit in units: self.sync_conduit.remove_unit(unit)
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get( importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean( importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER), 'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS), 'working_dir': common_utils.get_working_directory() } downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) self.repo_units = [] @property def download_deferred(self): """ Test the download policy to determine if downloading is deferred. :return: True if deferred. :rtype: bool """ policy = self.config.get(importer_constants.DOWNLOAD_POLICY, importer_constants.DOWNLOAD_IMMEDIATE) return policy != importer_constants.DOWNLOAD_IMMEDIATE def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() self._associate_unit(self.sync_conduit.repo, iso) iso.safe_import_content(report.destination) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def add_catalog_entries(self, units): """ Add entries to the deferred downloading (lazy) catalog. Skip entries which are not eligible for lazy catalog. (Don't have url attribute.) :param units: A list of: pulp_rpm.plugins.db.models.ISO. :type units: list """ for unit in units: # Unit is from pulp manifest if not hasattr(unit, "url"): continue if not unit.storage_path: unit.set_storage_path(unit.name) entry = LazyCatalogEntry() entry.path = unit.storage_path entry.importer_id = str(self.sync_conduit.importer_object_id) entry.unit_id = unit.id entry.unit_type_id = unit.type_id entry.url = unit.url entry.checksum = unit.checksum # The current ISO model does not define a checksum type, but appears to use sha256. # Once the model includes the checksum type, this should use that field. entry.checksum_algorithm = 'sha256' entry.save_revision() def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: for iso in local_available_isos: self._associate_unit(self.sync_conduit.repo, iso) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_available_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() else: self.add_catalog_entries([iso]) self._associate_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) for unit in remote_missing_isos: qs = LazyCatalogEntry.objects.filter(importer_id=str( self.sync_conduit.importer_object_id), unit_id=unit.id, unit_type_id=unit.type_id) qs.delete() # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append( request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError( _("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _( 'The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _associate_unit(self, repo, unit): """ Associate an iso unit with a repository but first check if there's already any with the same name and if so, remove them. :param repo: An ISO repository that is being synced :type repo: pulp.server.db.model.Repository :param unit: An ISO unit to associate with repo :type unit: pulp_rpm.plugins.db.models.ISO """ if not self.repo_units: # store the existing repo units to prevent querying mongo multiple times self.repo_units = list( repo_controller.find_repo_content_units( repo, yield_content_unit=True)) units_to_remove = [ iso for iso in self.repo_units if iso['name'] == unit['name'] ] repo_controller.disassociate_units(repo, units_to_remove) repo_controller.associate_single_unit(repo, unit) def _filter_missing_isos(self, manifest, download_deferred): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ # A list of all the ISOs we have in Pulp existing_units = models.ISO.objects() existing_units_by_key = dict([ (unit.unit_key_str, unit) for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path) ]) existing_units.rewind() existing_unit_keys = set([ unit.unit_key_str for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path) ]) # A list of units currently associated with the repository existing_repo_units = repo_controller.find_repo_content_units( self.sync_conduit.repo, yield_content_unit=True) existing_repo_units = list(existing_repo_units) existing_repo_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_repo_units]) existing_repo_unit_keys = set( [unit.unit_key_str for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest]) available_iso_keys = set([iso.unit_key_str for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set( [iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [ existing_units_by_key[k] for k in local_available_iso_keys ] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [ available_isos_by_key[k] for k in local_missing_iso_keys ] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [ existing_repo_units_by_key[k] for k in remote_missing_unit_keys ] return local_missing_isos, local_available_units, remote_missing_units
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER), 'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS), 'working_dir': common_utils.get_working_directory()} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) self.repo_units = [] @property def download_deferred(self): """ Test the download policy to determine if downloading is deferred. :return: True if deferred. :rtype: bool """ policy = self.config.get( importer_constants.DOWNLOAD_POLICY, importer_constants.DOWNLOAD_IMMEDIATE) return policy != importer_constants.DOWNLOAD_IMMEDIATE def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() self._associate_unit(self.sync_conduit.repo, iso) iso.safe_import_content(report.destination) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def add_catalog_entries(self, units): """ Add entries to the deferred downloading (lazy) catalog. Skip entries which are not eligible for lazy catalog. (Don't have url attribute.) :param units: A list of: pulp_rpm.plugins.db.models.ISO. :type units: list """ for unit in units: # Unit is from pulp manifest if not hasattr(unit, "url"): continue if not unit.storage_path: unit.set_storage_path(unit.name) entry = LazyCatalogEntry() entry.path = unit.storage_path entry.importer_id = str(self.sync_conduit.importer_object_id) entry.unit_id = unit.id entry.unit_type_id = unit.type_id entry.url = unit.url entry.checksum = unit.checksum # The current ISO model does not define a checksum type, but appears to use sha256. # Once the model includes the checksum type, this should use that field. entry.checksum_algorithm = 'sha256' entry.save_revision() def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO._content_type_id.default, search_dicts) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_available_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() else: self.add_catalog_entries([iso]) self._associate_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) for unit in remote_missing_isos: qs = LazyCatalogEntry.objects.filter( importer_id=str(self.sync_conduit.importer_object_id), unit_id=unit.id, unit_type_id=unit.type_id) qs.delete() # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append(request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _associate_unit(self, repo, unit): """ Associate an iso unit with a repository but first check if there's already any with the same name and if so, remove them. :param repo: An ISO repository that is being synced :type repo: pulp.server.db.model.Repository :param unit: An ISO unit to associate with repo :type unit: pulp_rpm.plugins.db.models.ISO """ if not self.repo_units: # store the existing repo units to prevent querying mongo multiple times self.repo_units = repo_controller.find_repo_content_units(repo, yield_content_unit=True) units_to_remove = [iso for iso in self.repo_units if iso['name'] == unit['name']] repo_controller.disassociate_units(repo, units_to_remove) repo_controller.associate_single_unit(repo, unit) def _filter_missing_isos(self, manifest, download_deferred): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ # A list of all the ISOs we have in Pulp existing_units = models.ISO.objects() existing_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path)]) existing_units.rewind() existing_unit_keys = set([unit.unit_key_str for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path)]) # A list of units currently associated with the repository existing_repo_units = repo_controller.find_repo_content_units( self.sync_conduit.repo, yield_content_unit=True) existing_repo_units = list(existing_repo_units) existing_repo_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_repo_units]) existing_repo_unit_keys = set([unit.unit_key_str for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest]) available_iso_keys = set([iso.unit_key_str for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set([iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys] return local_missing_isos, local_available_units, remote_missing_units
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): self.sync_conduit = sync_conduit self._remove_missing_units = config.get(constants.CONFIG_REMOVE_MISSING_UNITS, default=False) self._repo_url = encode_unicode(config.get(constants.CONFIG_FEED_URL)) self._validate_downloads = config.get(constants.CONFIG_VALIDATE_DOWNLOADS, default=True) # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(constants.CONFIG_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) num_threads = config.get(constants.CONFIG_NUM_THREADS) if num_threads is not None: num_threads = int(num_threads) else: num_threads = constants.DEFAULT_NUM_THREADS downloader_config = { 'max_speed': max_speed, 'num_threads': num_threads, 'ssl_client_cert': config.get(constants.CONFIG_SSL_CLIENT_CERT), 'ssl_client_key': config.get(constants.CONFIG_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(constants.CONFIG_SSL_CA_CERT), 'ssl_verify_host': 1, 'ssl_verify_peer': 1, 'proxy_url': config.get(constants.CONFIG_PROXY_URL), 'proxy_port': config.get(constants.CONFIG_PROXY_PORT), 'proxy_user': config.get(constants.CONFIG_PROXY_USER), 'proxy_password': config.get(constants.CONFIG_PROXY_PASSWORD)} downloader_config = DownloaderConfig(protocol='https', **downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this class self.downloader = factory.get_downloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does not support it # and so for now we will just pass pass def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to failed for that # phase. if self.progress_report.manifest_state == STATE_RUNNING: self.progress_report.manifest_state = STATE_FAILED elif self.progress_report.isos_state == STATE_RUNNING: iso = self._url_iso_map[report.url] self.progress_report.add_failed_iso(iso, report.error_report) del self._url_iso_map[report.url] self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: pulp.common.download.report.DownloadReport """ if self.progress_report.isos_state == STATE_RUNNING: iso = self._url_iso_map[report.url] additional_bytes_downloaded = report.bytes_downloaded - iso['bytes_downloaded'] self.progress_report.isos_finished_bytes += additional_bytes_downloaded iso['bytes_downloaded'] = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: pulp.common.download.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.isos_state == STATE_RUNNING: # This will update our bytes downloaded self.download_progress(report) iso = self._url_iso_map[report.url] try: if self._validate_downloads: self._validate_download(iso) self.sync_conduit.save_unit(iso['unit']) # We can drop this ISO from the url --> ISO map self.progress_report.isos_finished_count += 1 self.progress_report.update_progress() del self._url_iso_map[report.url] except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation accoring to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.manifest_state = STATE_RUNNING self.progress_report.update_progress() manifest = self._download_manifest() self.progress_report.manifest_state = STATE_COMPLETE # Go get them filez self.progress_report.isos_state = STATE_RUNNING self.progress_report.update_progress() local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest) self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished self.progress_report.isos_state = STATE_COMPLETE self.progress_report.update_progress() report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. It is a list of dictionaries with at least the following keys: name, checksum, size, and url. :type manifest: list """ self.progress_report.isos_total_bytes = 0 self.progress_report.isos_total_count = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want it to be stored, # and initialize the Unit that will represent it for iso in manifest: unit_key = {'name': iso['name'], 'size': iso['size'], 'checksum': iso['checksum']} metadata = {} relative_path = os.path.join(unit_key['name'], unit_key['checksum'], str(unit_key['size']), unit_key['name']) unit = self.sync_conduit.init_unit(ids.TYPE_ID_ISO, unit_key, metadata, relative_path) iso['destination'] = unit.storage_path iso['unit'] = unit iso['bytes_downloaded'] = 0 # Set the total bytes onto the report self.progress_report.isos_total_bytes += iso['size'] self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso['url'], iso['destination']) for iso in manifest] # Let's build an index from URL to the manifest unit dictionary, so that we can access data like the # name, checksum, and size as we process completed downloads self._url_iso_map = dict([(iso['url'], iso) for iso in manifest]) self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return a list of the available Units. The available units will be a list of dictionaries that describe the available ISOs, with these keys: name, checksum, size, and url. :return: list of available ISOs :rtype: list """ manifest_url = urljoin(self._repo_url, constants.ISO_MANIFEST_FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.manifest_state == STATE_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) # Now let's process the manifest and return a list of resources that we'd like to download manifest_destiny.seek(0) manifest_csv = csv.reader(manifest_destiny) manifest = [] for unit in manifest_csv: name, checksum, size = unit resource = {'name': name, 'checksum': checksum, 'size': int(size), 'url': urljoin(self._repo_url, name)} manifest.append(resource) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. Return a 2-tuple with this information. The first element of the tuple will be a subset of the given manifest that represents the missing ISOs. The second element will be a list of units that represent the ISOs we have in our local store that weren't found at the feed_url. The manifest is a list of dictionaries that must contain at a minimum the following keys: name, checksum, size. :param manifest: A list of dictionaries that describe the ISOs that are available at the feed_url that we are synchronizing with :type manifest: list :return: A 2-tuple. The first element of the tuple is a list of dictionaries that describe the ISOs that we should retrieve from the feed_url. These dictionaries are in the same format as they were in the manifest. The second element of the tuple is a list of units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ def _unit_key_str(unit_key_dict): return '%s-%s-%s' % (unit_key_dict['name'], unit_key_dict['checksum'], unit_key_dict['size']) module_criteria = UnitAssociationCriteria(type_ids=[ids.TYPE_ID_ISO]) existing_units = self.sync_conduit.get_units(criteria=module_criteria) available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest]) existing_units_by_key = dict([(_unit_key_str(unit.unit_key), unit) for unit in existing_units]) existing_unit_keys = set([_unit_key_str(unit.unit_key) for unit in existing_units]) available_iso_keys = set([_unit_key_str(iso) for iso in manifest]) local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys] remote_missing_unit_keys = list(existing_unit_keys - available_iso_keys) remote_missing_units = [existing_units_by_key[k] for k in remote_missing_unit_keys] return local_missing_isos, remote_missing_units def _remove_units(self, units): """ Use the sync_conduit's remove_unit call for each unit in units. :param units: List of pulp.plugins.model.Units that we want to remove from the repository :type units: list """ for unit in units: self.sync_conduit.remove_unit(unit) def _validate_download(self, iso): """ Validate the size and the checksum of the given downloaded iso. iso should be a dictionary with at least these keys: name, checksum, size, and destination. :param iso: A dictionary describing the ISO file we want to validate :type iso: dict """ with open(iso['destination']) as destination_file: # Validate the size, if we know what it should be if 'size' in iso: # seek to the end to find the file size with tell() destination_file.seek(0, 2) size = destination_file.tell() if size != iso['size']: raise ValueError(_('Downloading <%(name)s> failed validation. ' 'The manifest specified that the file should be %(expected)s bytes, but ' 'the downloaded file is %(found)s bytes.') % {'name': iso['name'], 'expected': iso['size'], 'found': size}) # Validate the checksum, if we know what it should be if 'checksum' in iso: destination_file.seek(0) hasher = hashlib.sha256() bits = destination_file.read(VALIDATION_CHUNK_SIZE) while bits: hasher.update(bits) bits = destination_file.read(VALIDATION_CHUNK_SIZE) # Verify that, son! if hasher.hexdigest() != iso['checksum']: raise ValueError( _('Downloading <%(name)s> failed checksum validation. The manifest ' 'specified the checksum to be %(c)s, but it was %(f)s.') % { 'name': iso['name'], 'c': iso['checksum'], 'f': hasher.hexdigest()})
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): self.sync_conduit = sync_conduit self._remove_missing_units = config.get(importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest) self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: list """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want it to be stored, # and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.common.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError, e: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +\ 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get( importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean( importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS) } downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does # not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts) # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [ request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest ] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError( _("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _( 'The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ def _unit_key_str(iso): """ Return a simple string representation of the unit key of the ISO. :param iso: The ISO for which we want a unit key string representation :type iso: pulp_rpm.plugins.db.models.ISO """ return '%s-%s-%s' % (iso.name, iso.checksum, iso.size) # A list of all the ISOs we have in Pulp search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO) existing_units = self.sync_conduit.search_all_units( models.ISO.TYPE, search_criteria) existing_units_by_key = dict([ (_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_units ]) existing_unit_keys = set([ _unit_key_str(models.ISO.from_unit(unit)) for unit in existing_units ]) # A list of units currently associated with the repository search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE]) existing_repo_units = self.sync_conduit.get_units(search_criteria) existing_repo_units_by_key = dict([ (_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_repo_units ]) existing_repo_unit_keys = set([ _unit_key_str(models.ISO.from_unit(unit)) for unit in existing_repo_units ]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest]) available_iso_keys = set([_unit_key_str(iso) for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set( [iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [ existing_units_by_key[k] for k in local_available_iso_keys ] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [ available_isos_by_key[k] for k in local_missing_iso_keys ] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [ existing_repo_units_by_key[k] for k in remote_missing_unit_keys ] return local_missing_isos, local_available_units, remote_missing_units def _remove_units(self, units): """ Use the sync_conduit's remove_unit call for each unit in units. :param units: List of pulp.plugins.model.Units that we want to remove from the repository :type units: list """ for unit in units: self.sync_conduit.remove_unit(unit)