def __init__(self, step_type, step_description, lazy_status_conduit, download_requests): """ Initializes a Step that downloads all the download requests provided. :param lazy_status_conduit: Conduit used to update the task status. :type lazy_status_conduit: LazyStatusConduit :param download_requests: List of download requests to process. :type download_requests: list of nectar.request.DownloadRequest """ super(LazyUnitDownloadStep, self).__init__( step_type=step_type, status_conduit=lazy_status_conduit, ) self.description = step_description self.download_requests = download_requests self.download_config = { MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')), HEADERS: { PULP_STREAM_REQUEST_HEADER: 'true' }, SSL_VALIDATION: True } self.downloader = HTTPThreadedDownloader( DownloaderConfig(**self.download_config), self)
def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER), 'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS), 'working_dir': common_utils.get_working_directory()} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) self.repo_units = []
def __init__(self, name, download_config, registry_url, working_dir): """ Initialize the V2Repository. :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ # Docker's registry aligns non-namespaced images to the library namespace. # if we have a docker registry image, and no namespace, add the library # namespace to the image name. if '/' not in name and re.search(r'registry[-,\w]*.docker.io', registry_url, re.IGNORECASE): self.name = "library/" + name else: self.name = name self.download_config = download_config self.registry_url = registry_url self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener()) self.working_dir = working_dir self.token = None
def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get( importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean( importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS) } downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit)
def test_download_cancelled_in_failed(self, mock_started, mock_cancel): request_list = [] for n in range(0, 5): unit_key = { 'name': 'unit_%d' % n, 'version': '1.0.%d' % n, 'release': '1', 'checksum': str(uuid4()) } request = Request(TYPE_ID, unit_key, 'http://unit-city/unit_%d' % n, os.path.join(self.downloaded, 'unit_%d' % n)) request_list.append(request) downloader = HTTPThreadedDownloader(DownloaderConfig()) container = ContentContainer(path=self.tmp_dir) container.refresh = Mock() event = CancelEvent(2) report = container.download(event, downloader, request_list) self.assertTrue(mock_started.called) self.assertTrue(mock_cancel.called) self.assertEqual(report.total_passes, 1) self.assertEqual(report.total_sources, 2) self.assertEqual(len(report.downloads), 1) self.assertEqual(report.downloads[PRIMARY_ID].total_succeeded, 0) self.assertEqual(report.downloads[PRIMARY_ID].total_failed, 5)
def __init__(self, name, download_config, registry_url, working_dir): """ Initialize the V2Repository. :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ # Docker's registry aligns non-namespaced images to the library namespace. # if we have a docker registry image, and no namespace, add the library # namespace to the image name. if '/' not in name and re.search(r'registry[-,\w]*.docker.io', registry_url, re.IGNORECASE): self.name = "library/" + name else: self.name = name self.download_config = download_config self.registry_url = registry_url self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener()) self.working_dir = working_dir self.token = None
def __init__(self, name, download_config, registry_url, working_dir): """ :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ self.name = name self.download_config = download_config self.registry_url = registry_url self.listener = AggregatingEventListener() self.downloader = HTTPThreadedDownloader(self.download_config, self.listener) self.working_dir = working_dir self.token = None self.endpoint = None
def initialize(self): """ Set up the nectar downloader Originally based on the ISO sync setup """ config = self.get_config() self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use # urljoin to determine the path later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' downloader_config = importer_config_to_nectar_config(config.flatten()) # We will pass self as the event_listener, so that we can receive the # callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self)
def initialize(self): """ Set up the nectar downloader Originally based on the ISO sync setup """ config = self.get_config() self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use # urljoin to determine the path later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' downloader_config = importer_config_to_nectar_config(config.flatten()) # We will pass self as the event_listener, so that we can receive the # callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self)
def __init__(self, name, download_config, registry_url, working_dir): """ Initialize the V2Repository. :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ self.name = name self.download_config = download_config self.registry_url = registry_url self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener()) self.working_dir = working_dir
def build_downloader(url, nectar_config): """ Return a Nectar downloader for a URL with the given nectar config. :param url: The URL is used to determine the scheme so the correct type of downloader can be created. :type url: basestring :param nectar_config: The configuration that should be used with the downloader :type nectar_config: nectar.config.DownloaderConfig :return: A configured downloader. :rtype: nectar.downloaders.base.Downloader :raise ValueError: When the URL scheme is not supported. """ url = urlparse(url) scheme = url.scheme.lower() if scheme == 'file': return LocalFileDownloader(nectar_config) if scheme in ('http', 'https'): return HTTPThreadedDownloader(nectar_config) raise ValueError(_('Scheme "{s}" not supported').format(s=url.scheme))
def get_downloader(config, url, **options): """ Get a configured downloader. :param config: A plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration :param url: A URL. :type url: str :param options: Extended configuration. :type options: dict :return: A configured downloader. :rtype: nectar.downloaders.base.Downloader :raise ValueError: when the URL scheme is not supported. """ url = urlparse(url) nectar_config = importer_config_to_nectar_config(config.flatten()) scheme = url.scheme.lower() if scheme == 'file': return LocalFileDownloader(nectar_config) if scheme in ('http', 'https'): return HTTPThreadedDownloader(nectar_config) raise ValueError(_('Scheme "{s}" not supported').format(s=url.scheme))
def test_download_fail_completely(self): request_list = [] _dir, cataloged = self.populate_catalog(UNIT_WORLD, 0, 10) shutil.rmtree(_dir) _dir = self.populate_content(PRIMARY, 0, 20) # primary for n in range(0, 10): unit_key = { 'name': 'unit_%d' % n, 'version': '1.0.%d' % n, 'release': '1', 'checksum': str(uuid4()) } request = Request(TYPE_ID, unit_key, 'http://redhat.com/%s/unit_%d' % (_dir, n), os.path.join(self.downloaded, 'unit_%d' % n)) request_list.append(request) downloader = HTTPThreadedDownloader(DownloaderConfig()) listener = MockListener() container = ContentContainer(path=self.tmp_dir) container.refresh = Mock() event = Event() report = container.download(event, downloader, request_list, listener) # primary for i in range(0, len(request_list)): request = request_list[i] self.assertFalse(request.downloaded) self.assertEqual(len(request.errors), 1) self.assertEqual(listener.download_started.call_count, len(request_list)) self.assertEqual(listener.download_succeeded.call_count, 0) self.assertEqual(listener.download_failed.call_count, len(request_list)) self.assertEqual(report.total_passes, 1) self.assertEqual(report.total_sources, 2) self.assertEqual(len(report.downloads), 1) self.assertEqual(report.downloads[PRIMARY_ID].total_succeeded, 0) self.assertEqual(report.downloads[PRIMARY_ID].total_failed, 10)
def __init__(self, step_type, step_description, lazy_status_conduit, download_requests): """ Initializes a Step that downloads all the download requests provided. :param lazy_status_conduit: Conduit used to update the task status. :type lazy_status_conduit: LazyStatusConduit :param download_requests: List of download requests to process. :type download_requests: list of nectar.request.DownloadRequest """ super(LazyUnitDownloadStep, self).__init__( step_type=step_type, status_conduit=lazy_status_conduit, ) self.description = step_description self.download_requests = download_requests self.download_config = { MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')), HEADERS: {PULP_STREAM_REQUEST_HEADER: 'true'}, SSL_VALIDATION: True } self.downloader = HTTPThreadedDownloader( DownloaderConfig(**self.download_config), self )
class LazyUnitDownloadStep(Step, DownloadEventListener): """ A Step that downloads all the given requests. The downloader is configured to download from the Pulp Streamer components. :ivar download_requests: The download requests the step will process. :type download_requests: list of nectar.request.DownloadRequest :ivar download_config: The keyword args used to initialize the Nectar downloader configuration. :type download_config: dict :ivar downloader: The Nectar downloader used to fetch the requests. :type downloader: nectar.downloaders.threaded.HTTPThreadedDownloader """ def __init__(self, step_type, step_description, lazy_status_conduit, download_requests): """ Initializes a Step that downloads all the download requests provided. :param lazy_status_conduit: Conduit used to update the task status. :type lazy_status_conduit: LazyStatusConduit :param download_requests: List of download requests to process. :type download_requests: list of nectar.request.DownloadRequest """ super(LazyUnitDownloadStep, self).__init__( step_type=step_type, status_conduit=lazy_status_conduit, ) self.description = step_description self.download_requests = download_requests self.download_config = { MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')), HEADERS: {PULP_STREAM_REQUEST_HEADER: 'true'}, SSL_VALIDATION: True } self.downloader = HTTPThreadedDownloader( DownloaderConfig(**self.download_config), self ) def _process_block(self, item=None): """ This block is called by the `process` loop. This is overridden because success and failure is determined during the EventListener callbacks, which will handle updating the progress. Since `item` is not used, this does not make use of `process_main` and simply calls the downloader. Inherited from Step. :param item: Unused. :type item: None """ self.downloader.download(self.download_requests) def get_total(self): """ The total number of download requests so progress reporting occurs at the file level. Inherited from Step. :return: The number of download requests this step will process. :rtype: int """ return len(self.download_requests) def download_started(self, report): """ Checks the filesystem for the file that we are about to download, and if it exists, raise an exception which will cause Nectar to skip the download. Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport :raises SkipLocation: if the file is already downloaded and matches the checksum stored in the catalog. """ _logger.debug(_('Starting download of {url}.').format(url=report.url)) # Remove the deferred entry now that the download has started. query_set = DeferredDownload.objects.filter( unit_id=report.data[UNIT_ID], unit_type_id=report.data[TYPE_ID] ) query_set.delete() try: # If the file exists and the checksum is valid, don't download it path_entry = report.data[UNIT_FILES][report.destination] catalog_entry = path_entry[CATALOG_ENTRY] self.validate_file( catalog_entry.path, catalog_entry.checksum_algorithm, catalog_entry.checksum ) path_entry[PATH_DOWNLOADED] = True self.progress_successes += 1 self.report_progress() msg = _('{path} has already been downloaded.').format( path=path_entry[CATALOG_ENTRY].path) _logger.debug(msg) raise SkipLocation() except (InvalidChecksumType, VerificationException, IOError): # It's either missing or incorrect, so download it pass def download_succeeded(self, report): """ Marks the individual file for the unit as downloaded and moves it into its final storage location if its checksum value matches the value in the catalog entry (if present). Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport """ # Reload the content unit unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID]) unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID]) content_unit = unit_qs.only('_content_type_id', 'id', '_last_updated').get() path_entry = report.data[UNIT_FILES][report.destination] # Validate the file and update the progress. catalog_entry = path_entry[CATALOG_ENTRY] try: self.validate_file( report.destination, catalog_entry.checksum_algorithm, catalog_entry.checksum ) relative_path = os.path.relpath( catalog_entry.path, FileStorage.get_path(content_unit) ) if len(report.data[UNIT_FILES]) == 1: # If the unit is single-file, update the storage path to point to the file content_unit.set_storage_path(relative_path) unit_qs.update_one(set___storage_path=content_unit._storage_path) content_unit.import_content(report.destination) else: content_unit.import_content(report.destination, location=relative_path) self.progress_successes += 1 path_entry[PATH_DOWNLOADED] = True except (InvalidChecksumType, VerificationException, IOError), e: _logger.debug(_('Download of {path} failed: {reason}.').format( path=catalog_entry.path, reason=str(e))) path_entry[PATH_DOWNLOADED] = False self.progress_failures += 1 self.report_progress() # Mark the entire unit as downloaded, if necessary. download_flags = [entry[PATH_DOWNLOADED] for entry in report.data[UNIT_FILES].values()] if all(download_flags): _logger.debug(_('Marking content unit {type}:{id} as downloaded.').format( type=content_unit.type_id, id=content_unit.id)) unit_qs.update_one(set__downloaded=True)
class V2Repository(object): """ This class represents a Docker v2 repository. """ API_VERSION_CHECK_PATH = '/v2/' LAYER_PATH = '/v2/{name}/blobs/{digest}' MANIFEST_PATH = '/v2/{name}/manifests/{reference}' TAGS_PATH = '/v2/{name}/tags/list' def __init__(self, name, download_config, registry_url, working_dir): """ Initialize the V2Repository. :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ # Docker's registry aligns non-namespaced images to the library namespace. # if we have a docker registry image, and no namespace, add the library # namespace to the image name. if '/' not in name and re.search(r'registry[-,\w]*.docker.io', registry_url, re.IGNORECASE): self.name = "library/" + name else: self.name = name self.download_config = download_config self.registry_url = registry_url # Use basic auth information for retrieving tokens from auth server and for downloading # with basic auth self.auth_downloader = HTTPThreadedDownloader( copy.deepcopy(self.download_config), AggregatingEventListener()) self.download_config.basic_auth_username = None self.download_config.basic_auth_password = None self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener()) self.working_dir = working_dir self.token = None def api_version_check(self): """ Make a call to the registry URL's /v2/ API call to determine if the registry supports API v2. :return: True if the v2 API is found, else False :rtype: bool """ _logger.debug( 'Determining if the registry URL can do v2 of the Docker API.') try: headers, body = self._get_path(self.API_VERSION_CHECK_PATH) except IOError: return False try: version = headers['Docker-Distribution-API-Version'] if version != "registry/2.0": return False _logger.debug( _('The docker registry is using API version: %(v)s') % {'v': version}) except KeyError: # If the Docker-Distribution-API-Version header isn't present, we will assume that this # is a valid Docker 2.0 API server so that simple file-based webservers can serve as our # remote feed. pass return True def create_blob_download_request(self, digest): """ Return a DownloadRequest instance for the given blob digest. It is desirable to download the blob files with a separate downloader (for progress tracking, etc), so we just create the download requests here and let them get processed elsewhere. :param digest: digest of the docker blob you wish to download :type digest: basestring :return: a download request instance :rtype: nectar.request.DownloadRequest """ path = self.LAYER_PATH.format(name=self.name, digest=digest) url = urlparse.urljoin(self.registry_url, path) req = DownloadRequest(url, os.path.join(self.working_dir, digest)) return req def get_manifest(self, reference, headers=True, tag=True): """ Get the manifest and its digest for the given reference. :param reference: The reference (tag or digest) of the Manifest you wish to retrieve. :type reference: basestring :param headers: True if headers with accepted media type should be sent in the request :type headers: bool :param tag: True if the manifest should be retrieved by tag :type tag: bool :return: A 2-tuple of the digest and the manifest, both basestrings :rtype: tuple """ manifests = [] request_headers = {} content_type_header = 'content-type' path = self.MANIFEST_PATH.format(name=self.name, reference=reference) # we need to skip the check of returned mediatype in case we pull # the manifest by digest if headers: # set the headers for first request request_headers['Accept'] = ','.join( (constants.MEDIATYPE_MANIFEST_S2, constants.MEDIATYPE_MANIFEST_LIST, constants.MEDIATYPE_MANIFEST_S1, constants.MEDIATYPE_SIGNED_MANIFEST_S1)) response_headers, manifest = self._get_path(path, headers=request_headers) # we need to disable here the digest check because of wrong digests registry returns # https://github.com/docker/distribution/pull/2310 # we will just calculate it without camparing it to the value that registry has in the # docker-content-digest response header digest = models.UnitMixin.calculate_digest(manifest) # add manifest and digest manifests.append( (manifest, digest, response_headers.get(content_type_header))) # since in accept headers we have man_list and schema2 mediatype, registry would return # whether man list, schema2 or schema1. # if it is schema1 we do not need to make any other requests # if it is manifest list, we do not need to make any other requests, the converted type # for older clients will be requested later during the manifest list process time # if it is schema2 we need to ask schema1 for older clients. if tag and response_headers.get( content_type_header) == constants.MEDIATYPE_MANIFEST_S2: request_headers['Accept'] = ','.join( (constants.MEDIATYPE_MANIFEST_S1, constants.MEDIATYPE_SIGNED_MANIFEST_S1)) try: # for compatibility with older clients, try to fetch schema1 in case it is available response_headers, manifest = self._get_path( path, headers=request_headers) digest = self._digest_check(response_headers, manifest) # add manifest and digest manifests.append((manifest, digest, response_headers.get(content_type_header))) except IOError as e: if str(e) != 'Not Found': raise pass # returned list will be whether: # [(S2, digest, content_type), (S1, digest, content_type)] # or # [(list, digest, content_type)] # or # [(S1, digest, content_type)] # [(S2, digest, content_type)] # note the tuple has a new entry content_type which we need later to process # returned manifest mediatypes return manifests def _digest_check(self, headers, manifest): digest_header = 'docker-content-digest' if digest_header in headers: expected_digest = headers[digest_header] # The digest is formatted as algorithm:sum, so let's ask our hasher to use the same # algorithm as we received in the headers. digest = models.Manifest.calculate_digest( manifest, expected_digest.split(':')[0]) if digest != expected_digest: msg = _( 'The Manifest digest does not match the expected value. The remote ' 'feed announced a digest of {e}, but the downloaded digest was {d}.' ) msg = msg.format(e=expected_digest, d=digest) raise IOError(msg) else: digest = models.Manifest.calculate_digest(manifest) return digest def get_tags(self): """ Get a list of the available tags in the repository. :return: A list of basestrings of the available tags in the repository. :rtype: list """ path = self.TAGS_PATH.format(name=self.name) _logger.debug('retrieving tags from remote registry') try: headers, tags = self._get_path(path) except IOError as e: raise pulp_exceptions.PulpCodedException( error_code=error_codes.DKR1007, repo=self.name, registry=self.registry_url, reason=str(e)) return json.loads(tags)['tags'] or [] def _get_path(self, path, headers=None): """ Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and the response body. :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :param headers: headers sent in the request :type headers: dict :return: (headers, response body) :rtype: tuple """ url = urlparse.urljoin(self.registry_url, path) _logger.debug(_('Retrieving {0}'.format(url))) request = DownloadRequest(url, StringIO()) request.headers = headers if self.token: request.headers = auth_util.update_token_auth_header( request.headers, self.token) report = self.downloader.download_one(request) # If the download was unauthorized, check report header, if basic auth is expected # retry with basic auth, otherwise attempt to get a token and try again if report.state == report.DOWNLOAD_FAILED: if report.error_report.get( 'response_code') == httplib.UNAUTHORIZED: auth_header = report.headers.get('www-authenticate') if auth_header is None: raise IOError("401 responses are expected to " "contain authentication information") elif "Basic" in auth_header: _logger.debug( _('Download unauthorized, retrying with basic authentication' )) report = self.auth_downloader.download_one(request) else: _logger.debug( _('Download unauthorized, attempting to retrieve a token.' )) self.token = auth_util.request_token( self.auth_downloader, request, auth_header, self.name) request.headers = auth_util.update_token_auth_header( request.headers, self.token) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: # this condition was added in case the registry would not allow to access v2 endpoint # but still token would be valid for other endpoints. # see https://pulp.plan.io/issues/2643 if path == '/v2/' and report.error_report.get( 'response_code') == httplib.UNAUTHORIZED: pass else: self._raise_path_error(report) return report.headers, report.destination.getvalue() @staticmethod def _raise_path_error(report): """ Raise an exception with an appropriate error message. Specifically because docker hub responds with a 401 for repositories that don't exist, pulp cannot disambiguate Unauthorized vs. Not Found. This function tries to make an error message that is clear on that point. :param report: download report :type report: nectar.report.DownloadReport :raises IOError: always, with an appropriate message based on the report """ if report.error_report.get('response_code') == httplib.UNAUTHORIZED: # docker hub returns 401 for repos that don't exist, so we cannot disambiguate. raise IOError(_('Unauthorized or Not Found')) else: raise IOError(report.error_msg)
class V1Repository(object): """ This class represents a Docker v1 repository. """ ANCESTRY_PATH = '/v1/images/%s/ancestry' DOCKER_TOKEN_HEADER = 'x-docker-token' DOCKER_ENDPOINT_HEADER = 'x-docker-endpoints' IMAGES_PATH = '/v1/repositories/%s/images' TAGS_PATH = '/v1/repositories/%s/tags' API_VERSION_CHECK_PATH = '/v1/_ping' def __init__(self, name, download_config, registry_url, working_dir): """ Initialize the V1Repository. :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ self.name = name self.download_config = download_config self.registry_url = registry_url self.listener = AggregatingEventListener() self.downloader = HTTPThreadedDownloader(self.download_config, self.listener) self.working_dir = working_dir self.token = None self.endpoint = None def _get_single_path(self, path): """ Retrieve a single path within the upstream registry, and return its body after deserializing it as json :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: whatever gets deserialized out of the response body's json """ # if talking to docker hub, we'll get an endpoint specified, and then we'll have to get # tags from that endpoint instead of talking to the original feed URL. if self.endpoint: # we assume the same scheme that the registry URL used registry_url_parts = urlparse.urlsplit(self.registry_url) parts = urlparse.SplitResult(scheme=registry_url_parts.scheme, netloc=self.endpoint, path=path, query=None, fragment=None) url = urlparse.urlunsplit(parts) else: url = urlparse.urljoin(self.registry_url, path) request = DownloadRequest(url, StringIO()) if path.endswith('/images'): # this is required by the docker index and indicates that it should # return an auth token if request.headers is None: request.headers = {} request.headers[self.DOCKER_TOKEN_HEADER] = 'true' # endpoints require auth if self.endpoint: self.add_auth_header(request) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: raise IOError(report.error_msg) self._parse_response_headers(report.headers) return json.loads(report.destination.getvalue()) def _parse_response_headers(self, headers): """ Some responses can include header information that we need later. This grabs those values and stores them for later use. :param headers: dictionary-like object where keys are HTTP header names and values are their values. :type headers: dict """ # this is used for authorization on an endpoint if self.DOCKER_TOKEN_HEADER in headers: self.token = headers[self.DOCKER_TOKEN_HEADER] # this tells us what host to use when accessing image files if self.DOCKER_ENDPOINT_HEADER in headers: self.endpoint = headers[self.DOCKER_ENDPOINT_HEADER] def api_version_check(self): """ Make a call to the registry URL's /v1/_ping API call to determine if the registry supports API v1. :return: True if the v1 API is found, else False :rtype: bool """ _logger.debug( 'Determining if the registry URL can do v1 of the Docker API.') try: self._get_single_path(self.API_VERSION_CHECK_PATH) except IOError: return False return True def add_auth_header(self, request): """ Given a download request, add an Authorization header if we have an auth token available. :param request: a download request :type request: nectar.request.DownloadRequest """ if self.token: if request.headers is None: request.headers = {} # this emulates what docker itself does request.headers['Authorization'] = 'Token %s' % self.token def get_image_ids(self): """ Get a list of all images in the upstream repository. This is conceptually a little ambiguous, as there can be images in a repo that are neither tagged nor in the ancestry for a tagged image. :return: list of image IDs in the repo :rtype: list :raises pulp_exceptions.PulpCodedException: if fetching the IDs fails """ path = self.IMAGES_PATH % self.name _logger.debug('retrieving image ids from remote registry') try: raw_data = self._get_single_path(path) except IOError as e: _logger.debug(traceback.format_exc()) raise pulp_exceptions.PulpCodedException( error_code=error_codes.DKR1007, repo=self.name, registry=self.registry_url, reason=str(e)) return [item['id'] for item in raw_data] def get_image_url(self): """ Get a URL for the registry or the endpoint, for use in retrieving image files. The "endpoint" is a host name that might be returned in a header when retrieving repository data above. :return: a url that is either the provided registry url, or if an endpoint is known, that same url with the host replaced by the endpoint :rtype: basestring """ if self.endpoint: parts = list(urlparse.urlsplit(self.registry_url)) parts[1] = self.endpoint return urlparse.urlunsplit(parts) else: return self.registry_url def get_tags(self): """ Get a dictionary of tags from the upstream repo. :return: a dictionary where keys are tag names, and values are either full image IDs or abbreviated image IDs. :rtype: dict """ repo_name = self.name # this is a quirk of the docker registry API. if '/' not in repo_name: repo_name = 'library/' + repo_name path = self.TAGS_PATH % repo_name _logger.debug('retrieving tags from remote registry') raw_data = self._get_single_path(path) # raw_data will sometimes be a list of dicts, and sometimes just a dict, # depending on what version of the API we're talking to. if isinstance(raw_data, list): return dict((tag['name'], tag['layer']) for tag in raw_data) return raw_data def get_ancestry(self, image_ids): """ Retrieve the "ancestry" file for each provided image ID, and save each in a directory whose name is the image ID. :param image_ids: list of image IDs for which the ancestry file should be retrieved :type image_ids: list :raises IOError: if a download fails """ requests = [] for image_id in image_ids: path = self.ANCESTRY_PATH % image_id url = urlparse.urljoin(self.get_image_url(), path) destination = os.path.join(self.working_dir, image_id, 'ancestry') try: os.mkdir(os.path.split(destination)[0]) except OSError, e: # it's ok if the directory already exists if e.errno != errno.EEXIST: raise request = DownloadRequest(url, destination) self.add_auth_header(request) requests.append(request) _logger.debug('retrieving ancestry files from remote registry') self.downloader.download(requests) if len(self.listener.failed_reports): raise IOError(self.listener.failed_reports[0].error_msg)
class V2Repository(object): """ This class represents a Docker v2 repository. """ API_VERSION_CHECK_PATH = '/v2/' LAYER_PATH = '/v2/{name}/blobs/{digest}' MANIFEST_PATH = '/v2/{name}/manifests/{reference}' TAGS_PATH = '/v2/{name}/tags/list' def __init__(self, name, download_config, registry_url, working_dir): """ Initialize the V2Repository. :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ self.name = name self.download_config = download_config self.registry_url = registry_url self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener()) self.working_dir = working_dir def api_version_check(self): """ Make a call to the registry URL's /v2/ API call to determine if the registry supports API v2. :return: True if the v2 API is found, else False :rtype: bool """ _logger.debug('Determining if the registry URL can do v2 of the Docker API.') try: headers, body = self._get_path(self.API_VERSION_CHECK_PATH) except IOError: return False try: version = headers['Docker-Distribution-API-Version'] if version != "registry/2.0": return False _logger.debug(_('The docker registry is using API version: %(v)s') % {'v': version}) except KeyError: # If the Docker-Distribution-API-Version header isn't present, we will assume that this # is a valid Docker 2.0 API server so that simple file-based webservers can serve as our # remote feed. pass return True def create_blob_download_request(self, digest): """ Return a DownloadRequest instance for the given blob digest. It is desirable to download the blob files with a separate downloader (for progress tracking, etc), so we just create the download requests here and let them get processed elsewhere. :param digest: digest of the docker blob you wish to download :type digest: basestring :return: a download request instance :rtype: nectar.request.DownloadRequest """ path = self.LAYER_PATH.format(name=self.name, digest=digest) url = urlparse.urljoin(self.registry_url, path) req = DownloadRequest(url, os.path.join(self.working_dir, digest)) return req def get_manifest(self, reference): """ Get the manifest and its digest for the given reference. :param reference: The reference (tag or digest) of the Manifest you wish to retrieve. :type reference: basestring :return: A 2-tuple of the digest and the manifest, both basestrings :rtype: tuple """ path = self.MANIFEST_PATH.format(name=self.name, reference=reference) headers, manifest = self._get_path(path) digest_header = 'docker-content-digest' if digest_header in headers: expected_digest = headers[digest_header] # The digest is formatted as algorithm:sum, so let's ask our hasher to use the same # algorithm as we received in the headers. digest = models.Manifest.calculate_digest(manifest, expected_digest.split(':')[0]) if digest != expected_digest: msg = _('The Manifest digest does not match the expected value. The remote ' 'feed announced a digest of {e}, but the downloaded digest was {d}.') msg = msg.format(e=expected_digest, d=digest) raise IOError(msg) else: digest = models.Manifest.digest(manifest) return digest, manifest def get_tags(self): """ Get a list of the available tags in the repository. :return: A list of basestrings of the available tags in the repository. :rtype: list """ path = self.TAGS_PATH.format(name=self.name) headers, tags = self._get_path(path) return json.loads(tags)['tags'] def _get_path(self, path): """ Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and the response body. :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: (headers, response body) :rtype: tuple """ url = urlparse.urljoin(self.registry_url, path) _logger.debug(_('Retrieving {0}'.format(url))) request = DownloadRequest(url, StringIO()) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: raise IOError(report.error_msg) return report.headers, report.destination.getvalue()
class Repository(object): IMAGES_PATH = '/v1/repositories/%s/images' TAGS_PATH = '/v1/repositories/%s/tags' ANCESTRY_PATH = '/v1/images/%s/ancestry' DOCKER_TOKEN_HEADER = 'x-docker-token' DOCKER_ENDPOINT_HEADER = 'x-docker-endpoints' def __init__(self, name, download_config, registry_url, working_dir): """ :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ self.name = name self.download_config = download_config self.registry_url = registry_url self.listener = AggregatingEventListener() self.downloader = HTTPThreadedDownloader(self.download_config, self.listener) self.working_dir = working_dir self.token = None self.endpoint = None def _get_single_path(self, path): """ Retrieve a single path within the upstream registry, and return its body after deserializing it as json :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: whatever gets deserialized out of the response body's json """ # if talking to docker hub, we'll get an endpoint specified, and then we'll have to get # tags from that endpoint instead of talking to the original feed URL. if self.endpoint: # we assume the same scheme that the registry URL used registry_url_parts = urlparse.urlsplit(self.registry_url) parts = urlparse.SplitResult(scheme=registry_url_parts.scheme, netloc=self.endpoint, path=path, query=None, fragment=None) url = urlparse.urlunsplit(parts) else: url = urlparse.urljoin(self.registry_url, path) request = DownloadRequest(url, StringIO()) if path.endswith('/images'): # this is required by the docker index and indicates that it should # return an auth token if request.headers is None: request.headers = {} request.headers[self.DOCKER_TOKEN_HEADER] = 'true' # endpoints require auth if self.endpoint: self.add_auth_header(request) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: raise IOError(report.error_msg) self._parse_response_headers(report.headers) return json.loads(report.destination.getvalue()) def _parse_response_headers(self, headers): """ Some responses can include header information that we need later. This grabs those values and stores them for later use. :param headers: dictionary-like object where keys are HTTP header names and values are their values. :type headers: dict """ # this is used for authorization on an endpoint if self.DOCKER_TOKEN_HEADER in headers: self.token = headers[self.DOCKER_TOKEN_HEADER] # this tells us what host to use when accessing image files if self.DOCKER_ENDPOINT_HEADER in headers: self.endpoint = headers[self.DOCKER_ENDPOINT_HEADER] def get_image_ids(self): """ Get a list of all images in the upstream repository. This is conceptually a little ambiguous, as there can be images in a repo that are neither tagged nor in the ancestry for a tagged image. :return: list of image IDs in the repo :rtype: list :raises pulp_exceptions.PulpCodedException: if fetching the IDs fails """ path = self.IMAGES_PATH % self.name _logger.debug('retrieving image ids from remote registry') try: raw_data = self._get_single_path(path) except IOError: _logger.debug(traceback.format_exc()) raise pulp_exceptions.PulpCodedException(error_code=error_codes.DKR1007, repo=self.name, registry=self.registry_url) return [item['id'] for item in raw_data] def get_tags(self): """ Get a dictionary of tags from the upstream repo. :return: a dictionary where keys are tag names, and values are either full image IDs or abbreviated image IDs. :rtype: dict """ repo_name = self.name # this is a quirk of the docker registry API. if '/' not in repo_name: repo_name = 'library/' + repo_name path = self.TAGS_PATH % repo_name _logger.debug('retrieving tags from remote registry') raw_data = self._get_single_path(path) # raw_data will sometimes be a list of dicts, and sometimes just a dict, # depending on what version of the API we're talking to. if isinstance(raw_data, list): return dict((tag['name'], tag['layer']) for tag in raw_data) return raw_data def get_ancestry(self, image_ids): """ Retrieve the "ancestry" file for each provided image ID, and save each in a directory whose name is the image ID. :param image_ids: list of image IDs for which the ancestry file should be retrieved :type image_ids: list :raises IOError: if a download fails """ requests = [] for image_id in image_ids: path = self.ANCESTRY_PATH % image_id url = urlparse.urljoin(self.get_image_url(), path) destination = os.path.join(self.working_dir, image_id, 'ancestry') try: dirname = os.path.dirname(destination) os.makedirs(dirname) except OSError, e: # it's ok if the directory already exists if e.errno != errno.EEXIST: raise request = DownloadRequest(url, destination) self.add_auth_header(request) requests.append(request) _logger.debug('retrieving ancestry files from remote registry') self.downloader.download(requests) if len(self.listener.failed_reports): raise IOError(self.listener.failed_reports[0].error_msg)
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get( importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean( importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS) } downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does # not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts) # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [ request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest ] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError( _("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _( 'The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ def _unit_key_str(iso): """ Return a simple string representation of the unit key of the ISO. :param iso: The ISO for which we want a unit key string representation :type iso: pulp_rpm.plugins.db.models.ISO """ return '%s-%s-%s' % (iso.name, iso.checksum, iso.size) # A list of all the ISOs we have in Pulp search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO) existing_units = self.sync_conduit.search_all_units( models.ISO.TYPE, search_criteria) existing_units_by_key = dict([ (_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_units ]) existing_unit_keys = set([ _unit_key_str(models.ISO.from_unit(unit)) for unit in existing_units ]) # A list of units currently associated with the repository search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE]) existing_repo_units = self.sync_conduit.get_units(search_criteria) existing_repo_units_by_key = dict([ (_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_repo_units ]) existing_repo_unit_keys = set([ _unit_key_str(models.ISO.from_unit(unit)) for unit in existing_repo_units ]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest]) available_iso_keys = set([_unit_key_str(iso) for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set( [iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [ existing_units_by_key[k] for k in local_available_iso_keys ] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [ available_isos_by_key[k] for k in local_missing_iso_keys ] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [ existing_repo_units_by_key[k] for k in remote_missing_unit_keys ] return local_missing_isos, local_available_units, remote_missing_units def _remove_units(self, units): """ Use the sync_conduit's remove_unit call for each unit in units. :param units: List of pulp.plugins.model.Units that we want to remove from the repository :type units: list """ for unit in units: self.sync_conduit.remove_unit(unit)
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get( importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean( importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER), 'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS), 'working_dir': common_utils.get_working_directory() } downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) self.repo_units = [] @property def download_deferred(self): """ Test the download policy to determine if downloading is deferred. :return: True if deferred. :rtype: bool """ policy = self.config.get(importer_constants.DOWNLOAD_POLICY, importer_constants.DOWNLOAD_IMMEDIATE) return policy != importer_constants.DOWNLOAD_IMMEDIATE def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() self._associate_unit(self.sync_conduit.repo, iso) iso.safe_import_content(report.destination) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def add_catalog_entries(self, units): """ Add entries to the deferred downloading (lazy) catalog. Skip entries which are not eligible for lazy catalog. (Don't have url attribute.) :param units: A list of: pulp_rpm.plugins.db.models.ISO. :type units: list """ for unit in units: # Unit is from pulp manifest if not hasattr(unit, "url"): continue if not unit.storage_path: unit.set_storage_path(unit.name) entry = LazyCatalogEntry() entry.path = unit.storage_path entry.importer_id = str(self.sync_conduit.importer_object_id) entry.unit_id = unit.id entry.unit_type_id = unit.type_id entry.url = unit.url entry.checksum = unit.checksum # The current ISO model does not define a checksum type, but appears to use sha256. # Once the model includes the checksum type, this should use that field. entry.checksum_algorithm = 'sha256' entry.save_revision() def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: for iso in local_available_isos: self._associate_unit(self.sync_conduit.repo, iso) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_available_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() else: self.add_catalog_entries([iso]) self._associate_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) for unit in remote_missing_isos: qs = LazyCatalogEntry.objects.filter(importer_id=str( self.sync_conduit.importer_object_id), unit_id=unit.id, unit_type_id=unit.type_id) qs.delete() # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append( request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError( _("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _( 'The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _associate_unit(self, repo, unit): """ Associate an iso unit with a repository but first check if there's already any with the same name and if so, remove them. :param repo: An ISO repository that is being synced :type repo: pulp.server.db.model.Repository :param unit: An ISO unit to associate with repo :type unit: pulp_rpm.plugins.db.models.ISO """ if not self.repo_units: # store the existing repo units to prevent querying mongo multiple times self.repo_units = list( repo_controller.find_repo_content_units( repo, yield_content_unit=True)) units_to_remove = [ iso for iso in self.repo_units if iso['name'] == unit['name'] ] repo_controller.disassociate_units(repo, units_to_remove) repo_controller.associate_single_unit(repo, unit) def _filter_missing_isos(self, manifest, download_deferred): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ # A list of all the ISOs we have in Pulp existing_units = models.ISO.objects() existing_units_by_key = dict([ (unit.unit_key_str, unit) for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path) ]) existing_units.rewind() existing_unit_keys = set([ unit.unit_key_str for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path) ]) # A list of units currently associated with the repository existing_repo_units = repo_controller.find_repo_content_units( self.sync_conduit.repo, yield_content_unit=True) existing_repo_units = list(existing_repo_units) existing_repo_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_repo_units]) existing_repo_unit_keys = set( [unit.unit_key_str for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest]) available_iso_keys = set([iso.unit_key_str for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set( [iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [ existing_units_by_key[k] for k in local_available_iso_keys ] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [ available_isos_by_key[k] for k in local_missing_iso_keys ] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [ existing_repo_units_by_key[k] for k in remote_missing_unit_keys ] return local_missing_isos, local_available_units, remote_missing_units
class V2Repository(object): """ This class represents a Docker v2 repository. """ API_VERSION_CHECK_PATH = '/v2/' LAYER_PATH = '/v2/{name}/blobs/{digest}' MANIFEST_PATH = '/v2/{name}/manifests/{reference}' TAGS_PATH = '/v2/{name}/tags/list' def __init__(self, name, download_config, registry_url, working_dir): """ Initialize the V2Repository. :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ # Docker's registry aligns non-namespaced images to the library namespace. # if we have a docker registry image, and no namespace, add the library # namespace to the image name. if '/' not in name and re.search(r'registry[-,\w]*.docker.io', registry_url, re.IGNORECASE): self.name = "library/" + name else: self.name = name self.download_config = download_config self.registry_url = registry_url self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener()) self.working_dir = working_dir self.token = None def api_version_check(self): """ Make a call to the registry URL's /v2/ API call to determine if the registry supports API v2. :return: True if the v2 API is found, else False :rtype: bool """ _logger.debug( 'Determining if the registry URL can do v2 of the Docker API.') try: headers, body = self._get_path(self.API_VERSION_CHECK_PATH) except IOError: return False try: version = headers['Docker-Distribution-API-Version'] if version != "registry/2.0": return False _logger.debug( _('The docker registry is using API version: %(v)s') % {'v': version}) except KeyError: # If the Docker-Distribution-API-Version header isn't present, we will assume that this # is a valid Docker 2.0 API server so that simple file-based webservers can serve as our # remote feed. pass return True def create_blob_download_request(self, digest): """ Return a DownloadRequest instance for the given blob digest. It is desirable to download the blob files with a separate downloader (for progress tracking, etc), so we just create the download requests here and let them get processed elsewhere. :param digest: digest of the docker blob you wish to download :type digest: basestring :return: a download request instance :rtype: nectar.request.DownloadRequest """ path = self.LAYER_PATH.format(name=self.name, digest=digest) url = urlparse.urljoin(self.registry_url, path) req = DownloadRequest(url, os.path.join(self.working_dir, digest)) return req def get_manifest(self, reference): """ Get the manifest and its digest for the given reference. :param reference: The reference (tag or digest) of the Manifest you wish to retrieve. :type reference: basestring :return: A 2-tuple of the digest and the manifest, both basestrings :rtype: tuple """ path = self.MANIFEST_PATH.format(name=self.name, reference=reference) headers, manifest = self._get_path(path) digest_header = 'docker-content-digest' if digest_header in headers: expected_digest = headers[digest_header] # The digest is formatted as algorithm:sum, so let's ask our hasher to use the same # algorithm as we received in the headers. digest = models.Manifest.calculate_digest( manifest, expected_digest.split(':')[0]) if digest != expected_digest: msg = _( 'The Manifest digest does not match the expected value. The remote ' 'feed announced a digest of {e}, but the downloaded digest was {d}.' ) msg = msg.format(e=expected_digest, d=digest) raise IOError(msg) else: digest = models.Manifest.calculate_digest(manifest) return digest, manifest def get_tags(self): """ Get a list of the available tags in the repository. :return: A list of basestrings of the available tags in the repository. :rtype: list """ path = self.TAGS_PATH.format(name=self.name) headers, tags = self._get_path(path) return json.loads(tags)['tags'] def _get_path(self, path): """ Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and the response body. :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: (headers, response body) :rtype: tuple """ url = urlparse.urljoin(self.registry_url, path) _logger.debug(_('Retrieving {0}'.format(url))) request = DownloadRequest(url, StringIO()) if self.token: token_util.add_auth_header(request, self.token) report = self.downloader.download_one(request) # If the download was unauthorized, attempt to get a token and try again if report.state == report.DOWNLOAD_FAILED: if report.error_report.get( 'response_code') == httplib.UNAUTHORIZED: _logger.debug( _('Download unauthorized, attempting to retrieve a token.') ) self.token = token_util.request_token(self.downloader, request, report.headers) token_util.add_auth_header(request, self.token) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: raise IOError(report.error_msg) return report.headers, report.destination.getvalue()
class DownloadStep(PluginStep, listener.DownloadEventListener): def __init__(self, step_type, downloads=None, repo=None, conduit=None, config=None, working_dir=None, plugin_type=None, description=''): """ Set the default parent and step_type for the Download step :param step_type: The id of the step this processes :type step_type: str :param downloads: A list of DownloadRequests :type downloads: list of nectar.request.DownloadRequest :param repo: The repo to be published :type repo: pulp.plugins.model.Repository :param conduit: The conduit for the repo :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: The publish configuration :type config: PluginCallConfiguration :param working_dir: The temp directory this step should use for processing :type working_dir: str :param plugin_type: The type of the plugin :type plugin_type: str :param description: The text description that will be displayed to users :type description: basestring """ super(DownloadStep, self).__init__(step_type, repo=repo, conduit=conduit, config=config, working_dir=working_dir, plugin_type=plugin_type) if downloads is not None: self._downloads = downloads else: self._downloads = [] self.step_type = step_type self.repo = repo self.conduit = conduit self.config = config self.working_dir = working_dir self.plugin_type = plugin_type self.description = description def initialize(self): """ Set up the nectar downloader Originally based on the ISO sync setup """ config = self.get_config() self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use # urljoin to determine the path later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' downloader_config = importer_config_to_nectar_config(config.flatten()) # We will pass self as the event_listener, so that we can receive the # callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) @property def downloads(self): """ This lets the class be instantiated with "downloads" as a generator that gets lazily evaluated. This is helpful, because at the time of instantiation, it is probably not known what downloads will be required. :return: list of download requests (nectar.request.DownloadRequest) :rtype: list """ if not isinstance(self._downloads, list): self._downloads = list(self._downloads) return self._downloads def get_total(self): """ Get total number of items to download :returns: number of DownloadRequests :rtype: int """ return len(self.downloads) def _process_block(self): """ the main "do stuff" method. In this case, just kick off all the downloads. """ self.downloader.download(self.downloads) # from listener.DownloadEventListener def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when any individual download succeeds. Bump the successes counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_successes += 1 self.report_progress() # from listener.DownloadEventListener def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. Bump the failure counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_failures += 1 self.report_progress() def cancel(self): """ Cancel the current step """ super(DownloadStep, self).cancel() self.downloader.cancel()
class LazyUnitDownloadStep(Step, DownloadEventListener): """ A Step that downloads all the given requests. The downloader is configured to download from the Pulp Streamer components. :ivar download_requests: The download requests the step will process. :type download_requests: list of nectar.request.DownloadRequest :ivar download_config: The keyword args used to initialize the Nectar downloader configuration. :type download_config: dict :ivar downloader: The Nectar downloader used to fetch the requests. :type downloader: nectar.downloaders.threaded.HTTPThreadedDownloader """ def __init__(self, step_type, step_description, lazy_status_conduit, download_requests): """ Initializes a Step that downloads all the download requests provided. :param lazy_status_conduit: Conduit used to update the task status. :type lazy_status_conduit: LazyStatusConduit :param download_requests: List of download requests to process. :type download_requests: list of nectar.request.DownloadRequest """ super(LazyUnitDownloadStep, self).__init__( step_type=step_type, status_conduit=lazy_status_conduit, ) self.description = step_description self.download_requests = download_requests self.download_config = { MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')), HEADERS: { PULP_STREAM_REQUEST_HEADER: 'true' }, SSL_VALIDATION: True } self.downloader = HTTPThreadedDownloader( DownloaderConfig(**self.download_config), self) def _process_block(self, item=None): """ This block is called by the `process` loop. This is overridden because success and failure is determined during the EventListener callbacks, which will handle updating the progress. Since `item` is not used, this does not make use of `process_main` and simply calls the downloader. Inherited from Step. :param item: Unused. :type item: None """ self.downloader.download(self.download_requests) def get_total(self): """ The total number of download requests so progress reporting occurs at the file level. Inherited from Step. :return: The number of download requests this step will process. :rtype: int """ return len(self.download_requests) def download_started(self, report): """ Checks the filesystem for the file that we are about to download, and if it exists, raise an exception which will cause Nectar to skip the download. Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport :raises SkipLocation: if the file is already downloaded and matches the checksum stored in the catalog. """ _logger.debug(_('Starting download of {url}.').format(url=report.url)) # Remove the deferred entry now that the download has started. query_set = DeferredDownload.objects.filter( unit_id=report.data[UNIT_ID], unit_type_id=report.data[TYPE_ID]) query_set.delete() try: # If the file exists and the checksum is valid, don't download it path_entry = report.data[UNIT_FILES][report.destination] catalog_entry = path_entry[CATALOG_ENTRY] self.validate_file(catalog_entry.path, catalog_entry.checksum_algorithm, catalog_entry.checksum) path_entry[PATH_DOWNLOADED] = True self.progress_successes += 1 self.report_progress() msg = _('{path} has already been downloaded.').format( path=path_entry[CATALOG_ENTRY].path) _logger.debug(msg) raise SkipLocation() except (InvalidChecksumType, VerificationException, IOError): # It's either missing or incorrect, so download it pass def download_succeeded(self, report): """ Marks the individual file for the unit as downloaded and moves it into its final storage location if its checksum value matches the value in the catalog entry (if present). Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport """ # Reload the content unit unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID]) unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID]) content_unit = unit_qs.only('_content_type_id', 'id', '_last_updated').get() path_entry = report.data[UNIT_FILES][report.destination] # Validate the file and update the progress. catalog_entry = path_entry[CATALOG_ENTRY] try: self.validate_file(report.destination, catalog_entry.checksum_algorithm, catalog_entry.checksum) relative_path = os.path.relpath(catalog_entry.path, FileStorage.get_path(content_unit)) if len(report.data[UNIT_FILES]) == 1: # If the unit is single-file, update the storage path to point to the file content_unit.set_storage_path(relative_path) unit_qs.update_one( set___storage_path=content_unit._storage_path) content_unit.import_content(report.destination) else: content_unit.import_content(report.destination, location=relative_path) self.progress_successes += 1 path_entry[PATH_DOWNLOADED] = True except (InvalidChecksumType, VerificationException, IOError), e: _logger.debug( _('Download of {path} failed: {reason}.').format( path=catalog_entry.path, reason=str(e))) path_entry[PATH_DOWNLOADED] = False self.progress_failures += 1 self.report_progress() # Mark the entire unit as downloaded, if necessary. download_flags = [ entry[PATH_DOWNLOADED] for entry in report.data[UNIT_FILES].values() ] if all(download_flags): _logger.debug( _('Marking content unit {type}:{id} as downloaded.').format( type=content_unit.type_id, id=content_unit.id)) unit_qs.update_one(set__downloaded=True)
def get_downloader(self, conduit, config, url): if url.startswith('http'): return HTTPThreadedDownloader(nectar_config(config)) if url.startswith('file'): return LocalFileDownloader(nectar_config(config)) raise ValueError('unsupported url')
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): self.sync_conduit = sync_conduit self._remove_missing_units = config.get(importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest) self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: list """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want it to be stored, # and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.common.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError, e: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +\ 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest
class DownloadStep(PluginStep, listener.DownloadEventListener): def __init__(self, step_type, downloads=None, repo=None, conduit=None, config=None, working_dir=None, plugin_type=None, description=''): """ Set the default parent and step_type for the Download step :param step_type: The id of the step this processes :type step_type: str :param downloads: A list of DownloadRequests :type downloads: list of nectar.request.DownloadRequest :param repo: The repo to be published :type repo: pulp.plugins.model.Repository :param conduit: The conduit for the repo :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: The publish configuration :type config: PluginCallConfiguration :param working_dir: The temp directory this step should use for processing :type working_dir: str :param plugin_type: The type of the plugin :type plugin_type: str :param description: The text description that will be displayed to users :type description: basestring """ super(DownloadStep, self).__init__(step_type, repo=repo, conduit=conduit, config=config, working_dir=working_dir, plugin_type=plugin_type) if downloads is not None: self._downloads = downloads else: self._downloads = [] self.step_type = step_type self.repo = repo self.conduit = conduit self.config = config self.working_dir = working_dir self.plugin_type = plugin_type self.description = description def initialize(self): """ Set up the nectar downloader Originally based on the ISO sync setup """ config = self.get_config() self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use # urljoin to determine the path later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' downloader_config = importer_config_to_nectar_config(config.flatten()) # We will pass self as the event_listener, so that we can receive the # callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) @property def downloads(self): """ This lets the class be instantiated with "downloads" as a generator that gets lazily evaluated. This is helpful, because at the time of instantiation, it is probably not known what downloads will be required. :return: list of download requests (nectar.request.DownloadRequest) :rtype: list """ if not isinstance(self._downloads, list): self._downloads = list(self._downloads) return self._downloads def get_total(self): """ Get total number of items to download :returns: number of DownloadRequests :rtype: int """ return len(self.downloads) def _process_block(self): """ the main "do stuff" method. In this case, just kick off all the downloads. """ self.downloader.download(self.downloads) # from listener.DownloadEventListener def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when any individual download succeeds. Bump the successes counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_successes += 1 self.report_progress() # from listener.DownloadEventListener def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. Bump the failure counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_failures += 1 self.report_progress() def cancel(self): """ Cancel the current step """ super(DownloadStep, self).cancel() self.downloader.cancel()
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER), 'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS), 'working_dir': common_utils.get_working_directory()} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) self.repo_units = [] @property def download_deferred(self): """ Test the download policy to determine if downloading is deferred. :return: True if deferred. :rtype: bool """ policy = self.config.get( importer_constants.DOWNLOAD_POLICY, importer_constants.DOWNLOAD_IMMEDIATE) return policy != importer_constants.DOWNLOAD_IMMEDIATE def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() self._associate_unit(self.sync_conduit.repo, iso) iso.safe_import_content(report.destination) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def add_catalog_entries(self, units): """ Add entries to the deferred downloading (lazy) catalog. Skip entries which are not eligible for lazy catalog. (Don't have url attribute.) :param units: A list of: pulp_rpm.plugins.db.models.ISO. :type units: list """ for unit in units: # Unit is from pulp manifest if not hasattr(unit, "url"): continue if not unit.storage_path: unit.set_storage_path(unit.name) entry = LazyCatalogEntry() entry.path = unit.storage_path entry.importer_id = str(self.sync_conduit.importer_object_id) entry.unit_id = unit.id entry.unit_type_id = unit.type_id entry.url = unit.url entry.checksum = unit.checksum # The current ISO model does not define a checksum type, but appears to use sha256. # Once the model includes the checksum type, this should use that field. entry.checksum_algorithm = 'sha256' entry.save_revision() def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO._content_type_id.default, search_dicts) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_available_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() else: self.add_catalog_entries([iso]) self._associate_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) for unit in remote_missing_isos: qs = LazyCatalogEntry.objects.filter( importer_id=str(self.sync_conduit.importer_object_id), unit_id=unit.id, unit_type_id=unit.type_id) qs.delete() # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append(request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _associate_unit(self, repo, unit): """ Associate an iso unit with a repository but first check if there's already any with the same name and if so, remove them. :param repo: An ISO repository that is being synced :type repo: pulp.server.db.model.Repository :param unit: An ISO unit to associate with repo :type unit: pulp_rpm.plugins.db.models.ISO """ if not self.repo_units: # store the existing repo units to prevent querying mongo multiple times self.repo_units = repo_controller.find_repo_content_units(repo, yield_content_unit=True) units_to_remove = [iso for iso in self.repo_units if iso['name'] == unit['name']] repo_controller.disassociate_units(repo, units_to_remove) repo_controller.associate_single_unit(repo, unit) def _filter_missing_isos(self, manifest, download_deferred): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ # A list of all the ISOs we have in Pulp existing_units = models.ISO.objects() existing_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path)]) existing_units.rewind() existing_unit_keys = set([unit.unit_key_str for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path)]) # A list of units currently associated with the repository existing_repo_units = repo_controller.find_repo_content_units( self.sync_conduit.repo, yield_content_unit=True) existing_repo_units = list(existing_repo_units) existing_repo_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_repo_units]) existing_repo_unit_keys = set([unit.unit_key_str for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest]) available_iso_keys = set([iso.unit_key_str for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set([iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys] return local_missing_isos, local_available_units, remote_missing_units
def _create_and_configure_downloader(self, listener): config = importer_config_to_nectar_config(self.config.flatten()) return HTTPThreadedDownloader(config, listener)
class Repository(object): IMAGES_PATH = '/v1/repositories/%s/images' TAGS_PATH = '/v1/repositories/%s/tags' ANCESTRY_PATH = '/v1/images/%s/ancestry' DOCKER_TOKEN_HEADER = 'x-docker-token' DOCKER_ENDPOINT_HEADER = 'x-docker-endpoints' def __init__(self, name, download_config, registry_url, working_dir): """ :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ self.name = name self.download_config = download_config self.registry_url = registry_url self.listener = AggregatingEventListener() self.downloader = HTTPThreadedDownloader(self.download_config, self.listener) self.working_dir = working_dir self.token = None self.endpoint = None def _get_single_path(self, path): """ Retrieve a single path within the upstream registry, and return its body after deserializing it as json :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: whatever gets deserialized out of the response body's json """ url = urlparse.urljoin(self.registry_url, path) request = DownloadRequest(url, StringIO()) if path.endswith('/images'): # this is required by the docker index and indicates that it should # return an auth token if request.headers is None: request.headers = {} request.headers[self.DOCKER_TOKEN_HEADER] = 'true' report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: raise IOError(report.error_msg) self._parse_response_headers(report.headers) return json.loads(report.destination.getvalue()) def _parse_response_headers(self, headers): """ Some responses can include header information that we need later. This grabs those values and stores them for later use. :param headers: dictionary-like object where keys are HTTP header names and values are their values. :type headers: dict """ # this is used for authorization on an endpoint if self.DOCKER_TOKEN_HEADER in headers: self.token = headers[self.DOCKER_TOKEN_HEADER] # this tells us what host to use when accessing image files if self.DOCKER_ENDPOINT_HEADER in headers: self.endpoint = headers[self.DOCKER_ENDPOINT_HEADER] def get_image_ids(self): """ Get a list of all images in the upstream repository. This is conceptually a little ambiguous, as there can be images in a repo that are neither tagged nor in the ancestry for a tagged image. :return: list of image IDs in the repo :rtype: list """ path = self.IMAGES_PATH % self.name _logger.debug('retrieving image ids from remote registry') raw_data = self._get_single_path(path) return [item['id'] for item in raw_data] def get_tags(self): """ Get a dictionary of tags from the upstream repo. :return: a dictionary where keys are tag names, and values are either full image IDs or abbreviated image IDs. :rtype: dict """ repo_name = self.name # this is a quirk of the docker registry API. if '/' not in repo_name: repo_name = 'library/' + repo_name path = self.TAGS_PATH % repo_name _logger.debug('retrieving tags from remote registry') raw_data = self._get_single_path(path) # raw_data will sometimes be a list of dicts, and sometimes just a dict, # depending on what version of the API we're talking to. if isinstance(raw_data, list): return dict((tag['name'], tag['layer']) for tag in raw_data) return raw_data def get_ancestry(self, image_ids): """ Retrieve the "ancestry" file for each provided image ID, and save each in a directory whose name is the image ID. :param image_ids: list of image IDs for which the ancestry file should be retrieved :type image_ids: list :raises IOError: if a download fails """ requests = [] for image_id in image_ids: path = self.ANCESTRY_PATH % image_id url = urlparse.urljoin(self.get_image_url(), path) destination = os.path.join(self.working_dir, image_id, 'ancestry') try: os.mkdir(os.path.split(destination)[0]) except OSError, e: # it's ok if the directory already exists if e.errno != errno.EEXIST: raise request = DownloadRequest(url, destination) self.add_auth_header(request) requests.append(request) _logger.debug('retrieving ancestry files from remote registry') self.downloader.download(requests) if len(self.listener.failed_reports): raise IOError(self.listener.failed_reports[0].error_msg)
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does # not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts) # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ def _unit_key_str(iso): """ Return a simple string representation of the unit key of the ISO. :param iso: The ISO for which we want a unit key string representation :type iso: pulp_rpm.plugins.db.models.ISO """ return '%s-%s-%s' % (iso.name, iso.checksum, iso.size) # A list of all the ISOs we have in Pulp search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO) existing_units = self.sync_conduit.search_all_units(models.ISO.TYPE, search_criteria) existing_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_units]) existing_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit)) for unit in existing_units]) # A list of units currently associated with the repository search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE]) existing_repo_units = self.sync_conduit.get_units(search_criteria) existing_repo_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_repo_units]) existing_repo_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit)) for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest]) available_iso_keys = set([_unit_key_str(iso) for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set([iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys] return local_missing_isos, local_available_units, remote_missing_units def _remove_units(self, units): """ Use the sync_conduit's remove_unit call for each unit in units. :param units: List of pulp.plugins.model.Units that we want to remove from the repository :type units: list """ for unit in units: self.sync_conduit.remove_unit(unit)
class V2Repository(object): """ This class represents a Docker v2 repository. """ API_VERSION_CHECK_PATH = '/v2/' LAYER_PATH = '/v2/{name}/blobs/{digest}' MANIFEST_PATH = '/v2/{name}/manifests/{reference}' TAGS_PATH = '/v2/{name}/tags/list' def __init__(self, name, download_config, registry_url, working_dir): """ Initialize the V2Repository. :param name: name of a docker repository :type name: basestring :param download_config: download configuration object :type download_config: nectar.config.DownloaderConfig :param registry_url: URL for the docker registry :type registry_url: basestring :param working_dir: full path to the directory where files should be saved :type working_dir: basestring """ # Docker's registry aligns non-namespaced images to the library namespace. # if we have a docker registry image, and no namespace, add the library # namespace to the image name. if '/' not in name and re.search(r'registry[-,\w]*.docker.io', registry_url, re.IGNORECASE): self.name = "library/" + name else: self.name = name self.download_config = download_config self.registry_url = registry_url # Use basic auth information only for retrieving tokens from auth server. self.token_downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener()) self.download_config.basic_auth_username = None self.download_config.basic_auth_password = None self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener()) self.working_dir = working_dir self.token = None def api_version_check(self): """ Make a call to the registry URL's /v2/ API call to determine if the registry supports API v2. :return: True if the v2 API is found, else False :rtype: bool """ _logger.debug('Determining if the registry URL can do v2 of the Docker API.') try: headers, body = self._get_path(self.API_VERSION_CHECK_PATH) except IOError: return False try: version = headers['Docker-Distribution-API-Version'] if version != "registry/2.0": return False _logger.debug(_('The docker registry is using API version: %(v)s') % {'v': version}) except KeyError: # If the Docker-Distribution-API-Version header isn't present, we will assume that this # is a valid Docker 2.0 API server so that simple file-based webservers can serve as our # remote feed. pass return True def create_blob_download_request(self, digest): """ Return a DownloadRequest instance for the given blob digest. It is desirable to download the blob files with a separate downloader (for progress tracking, etc), so we just create the download requests here and let them get processed elsewhere. :param digest: digest of the docker blob you wish to download :type digest: basestring :return: a download request instance :rtype: nectar.request.DownloadRequest """ path = self.LAYER_PATH.format(name=self.name, digest=digest) url = urlparse.urljoin(self.registry_url, path) req = DownloadRequest(url, os.path.join(self.working_dir, digest)) return req def get_manifest(self, reference): """ Get the manifest and its digest for the given reference. :param reference: The reference (tag or digest) of the Manifest you wish to retrieve. :type reference: basestring :return: A 2-tuple of the digest and the manifest, both basestrings :rtype: tuple """ path = self.MANIFEST_PATH.format(name=self.name, reference=reference) headers, manifest = self._get_path(path) digest_header = 'docker-content-digest' if digest_header in headers: expected_digest = headers[digest_header] # The digest is formatted as algorithm:sum, so let's ask our hasher to use the same # algorithm as we received in the headers. digest = models.Manifest.calculate_digest(manifest, expected_digest.split(':')[0]) if digest != expected_digest: msg = _('The Manifest digest does not match the expected value. The remote ' 'feed announced a digest of {e}, but the downloaded digest was {d}.') msg = msg.format(e=expected_digest, d=digest) raise IOError(msg) else: digest = models.Manifest.calculate_digest(manifest) return digest, manifest def get_tags(self): """ Get a list of the available tags in the repository. :return: A list of basestrings of the available tags in the repository. :rtype: list """ path = self.TAGS_PATH.format(name=self.name) _logger.debug('retrieving tags from remote registry') try: headers, tags = self._get_path(path) except IOError as e: raise pulp_exceptions.PulpCodedException(error_code=error_codes.DKR1007, repo=self.name, registry=self.registry_url, reason=str(e)) return json.loads(tags)['tags'] def _get_path(self, path): """ Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and the response body. :param path: a full http path to retrieve that will be urljoin'd to the upstream registry url. :type path: basestring :return: (headers, response body) :rtype: tuple """ url = urlparse.urljoin(self.registry_url, path) _logger.debug(_('Retrieving {0}'.format(url))) request = DownloadRequest(url, StringIO()) if self.token: request.headers = token_util.update_auth_header(request.headers, self.token) report = self.downloader.download_one(request) # If the download was unauthorized, attempt to get a token and try again if report.state == report.DOWNLOAD_FAILED: if report.error_report.get('response_code') == httplib.UNAUTHORIZED: _logger.debug(_('Download unauthorized, attempting to retrieve a token.')) self.token = token_util.request_token(self.token_downloader, request, report.headers) request.headers = token_util.update_auth_header(request.headers, self.token) report = self.downloader.download_one(request) if report.state == report.DOWNLOAD_FAILED: self._raise_path_error(report) return report.headers, report.destination.getvalue() @staticmethod def _raise_path_error(report): """ Raise an exception with an appropriate error message. Specifically because docker hub responds with a 401 for repositories that don't exist, pulp cannot disambiguate Unauthorized vs. Not Found. This function tries to make an error message that is clear on that point. :param report: download report :type report: nectar.report.DownloadReport :raises IOError: always, with an appropriate message based on the report """ if report.error_report.get('response_code') == httplib.UNAUTHORIZED: # docker hub returns 401 for repos that don't exist, so we cannot disambiguate. raise IOError(_('Unauthorized or Not Found')) else: raise IOError(report.error_msg)