Example #1
0
    def __init__(self, step_type, step_description, lazy_status_conduit,
                 download_requests):
        """
        Initializes a Step that downloads all the download requests provided.

        :param lazy_status_conduit: Conduit used to update the task status.
        :type  lazy_status_conduit: LazyStatusConduit
        :param download_requests:   List of download requests to process.
        :type  download_requests:   list of nectar.request.DownloadRequest
        """
        super(LazyUnitDownloadStep, self).__init__(
            step_type=step_type,
            status_conduit=lazy_status_conduit,
        )
        self.description = step_description
        self.download_requests = download_requests
        self.download_config = {
            MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')),
            HEADERS: {
                PULP_STREAM_REQUEST_HEADER: 'true'
            },
            SSL_VALIDATION: True
        }
        self.downloader = HTTPThreadedDownloader(
            DownloaderConfig(**self.download_config), self)
Example #2
0
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self.config = config
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS),
            'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER),
            'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS),
            'working_dir': common_utils.get_working_directory()}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

        self.repo_units = []
Example #3
0
    def __init__(self, name, download_config, registry_url, working_dir):
        """
        Initialize the V2Repository.

        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """

        # Docker's registry aligns non-namespaced images to the library namespace.
        # if we have a docker registry image, and no namespace, add the library
        # namespace to the image name.

        if '/' not in name and re.search(r'registry[-,\w]*.docker.io',
                                         registry_url, re.IGNORECASE):
            self.name = "library/" + name
        else:
            self.name = name

        self.download_config = download_config
        self.registry_url = registry_url
        self.downloader = HTTPThreadedDownloader(self.download_config,
                                                 AggregatingEventListener())
        self.working_dir = working_dir
        self.token = None
Example #4
0
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(
            importer_constants.KEY_VALIDATE,
            default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(
            importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert':
            config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key':
            config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)
        }
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)
Example #5
0
 def test_download_cancelled_in_failed(self, mock_started, mock_cancel):
     request_list = []
     for n in range(0, 5):
         unit_key = {
             'name': 'unit_%d' % n,
             'version': '1.0.%d' % n,
             'release': '1',
             'checksum': str(uuid4())
         }
         request = Request(TYPE_ID, unit_key,
                           'http://unit-city/unit_%d' % n,
                           os.path.join(self.downloaded, 'unit_%d' % n))
         request_list.append(request)
     downloader = HTTPThreadedDownloader(DownloaderConfig())
     container = ContentContainer(path=self.tmp_dir)
     container.refresh = Mock()
     event = CancelEvent(2)
     report = container.download(event, downloader, request_list)
     self.assertTrue(mock_started.called)
     self.assertTrue(mock_cancel.called)
     self.assertEqual(report.total_passes, 1)
     self.assertEqual(report.total_sources, 2)
     self.assertEqual(len(report.downloads), 1)
     self.assertEqual(report.downloads[PRIMARY_ID].total_succeeded, 0)
     self.assertEqual(report.downloads[PRIMARY_ID].total_failed, 5)
Example #6
0
    def __init__(self, name, download_config, registry_url, working_dir):
        """
        Initialize the V2Repository.

        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """

        # Docker's registry aligns non-namespaced images to the library namespace.
        # if we have a docker registry image, and no namespace, add the library
        # namespace to the image name.

        if '/' not in name and re.search(r'registry[-,\w]*.docker.io', registry_url, re.IGNORECASE):
            self.name = "library/" + name
        else:
            self.name = name

        self.download_config = download_config
        self.registry_url = registry_url
        self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener())
        self.working_dir = working_dir
        self.token = None
Example #7
0
 def __init__(self, name, download_config, registry_url, working_dir):
     """
     :param name:            name of a docker repository
     :type  name:            basestring
     :param download_config: download configuration object
     :type  download_config: nectar.config.DownloaderConfig
     :param registry_url:    URL for the docker registry
     :type  registry_url:    basestring
     :param working_dir:     full path to the directory where files should
                             be saved
     :type  working_dir:     basestring
     """
     self.name = name
     self.download_config = download_config
     self.registry_url = registry_url
     self.listener = AggregatingEventListener()
     self.downloader = HTTPThreadedDownloader(self.download_config,
                                              self.listener)
     self.working_dir = working_dir
     self.token = None
     self.endpoint = None
Example #8
0
    def initialize(self):
        """
        Set up the nectar downloader

        Originally based on the ISO sync setup
        """
        config = self.get_config()
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use
        # urljoin to determine the path later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        downloader_config = importer_config_to_nectar_config(config.flatten())

        # We will pass self as the event_listener, so that we can receive the
        # callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
Example #9
0
    def initialize(self):
        """
        Set up the nectar downloader

        Originally based on the ISO sync setup
        """
        config = self.get_config()
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use
        # urljoin to determine the path later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        downloader_config = importer_config_to_nectar_config(config.flatten())

        # We will pass self as the event_listener, so that we can receive the
        # callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
Example #10
0
    def __init__(self, name, download_config, registry_url, working_dir):
        """
        Initialize the V2Repository.

        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """
        self.name = name
        self.download_config = download_config
        self.registry_url = registry_url
        self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener())
        self.working_dir = working_dir
Example #11
0
    def build_downloader(url, nectar_config):
        """
        Return a Nectar downloader for a URL with the given nectar config.

        :param url:           The URL is used to determine the scheme so the correct type of
                              downloader can be created.
        :type  url:           basestring
        :param nectar_config: The configuration that should be used with the downloader
        :type  nectar_config: nectar.config.DownloaderConfig
        :return:              A configured downloader.
        :rtype:               nectar.downloaders.base.Downloader
        :raise ValueError:    When the URL scheme is not supported.
        """
        url = urlparse(url)
        scheme = url.scheme.lower()
        if scheme == 'file':
            return LocalFileDownloader(nectar_config)
        if scheme in ('http', 'https'):
            return HTTPThreadedDownloader(nectar_config)
        raise ValueError(_('Scheme "{s}" not supported').format(s=url.scheme))
Example #12
0
    def get_downloader(config, url, **options):
        """
        Get a configured downloader.

        :param config: A plugin configuration.
        :type config: pulp.plugins.config.PluginCallConfiguration
        :param url: A URL.
        :type url: str
        :param options: Extended configuration.
        :type options: dict
        :return: A configured downloader.
        :rtype: nectar.downloaders.base.Downloader
        :raise ValueError: when the URL scheme is not supported.
        """
        url = urlparse(url)
        nectar_config = importer_config_to_nectar_config(config.flatten())
        scheme = url.scheme.lower()
        if scheme == 'file':
            return LocalFileDownloader(nectar_config)
        if scheme in ('http', 'https'):
            return HTTPThreadedDownloader(nectar_config)
        raise ValueError(_('Scheme "{s}" not supported').format(s=url.scheme))
Example #13
0
 def test_download_fail_completely(self):
     request_list = []
     _dir, cataloged = self.populate_catalog(UNIT_WORLD, 0, 10)
     shutil.rmtree(_dir)
     _dir = self.populate_content(PRIMARY, 0, 20)
     # primary
     for n in range(0, 10):
         unit_key = {
             'name': 'unit_%d' % n,
             'version': '1.0.%d' % n,
             'release': '1',
             'checksum': str(uuid4())
         }
         request = Request(TYPE_ID, unit_key,
                           'http://redhat.com/%s/unit_%d' % (_dir, n),
                           os.path.join(self.downloaded, 'unit_%d' % n))
         request_list.append(request)
     downloader = HTTPThreadedDownloader(DownloaderConfig())
     listener = MockListener()
     container = ContentContainer(path=self.tmp_dir)
     container.refresh = Mock()
     event = Event()
     report = container.download(event, downloader, request_list, listener)
     # primary
     for i in range(0, len(request_list)):
         request = request_list[i]
         self.assertFalse(request.downloaded)
         self.assertEqual(len(request.errors), 1)
     self.assertEqual(listener.download_started.call_count,
                      len(request_list))
     self.assertEqual(listener.download_succeeded.call_count, 0)
     self.assertEqual(listener.download_failed.call_count,
                      len(request_list))
     self.assertEqual(report.total_passes, 1)
     self.assertEqual(report.total_sources, 2)
     self.assertEqual(len(report.downloads), 1)
     self.assertEqual(report.downloads[PRIMARY_ID].total_succeeded, 0)
     self.assertEqual(report.downloads[PRIMARY_ID].total_failed, 10)
Example #14
0
    def __init__(self, step_type, step_description, lazy_status_conduit, download_requests):
        """
        Initializes a Step that downloads all the download requests provided.

        :param lazy_status_conduit: Conduit used to update the task status.
        :type  lazy_status_conduit: LazyStatusConduit
        :param download_requests:   List of download requests to process.
        :type  download_requests:   list of nectar.request.DownloadRequest
        """
        super(LazyUnitDownloadStep, self).__init__(
            step_type=step_type,
            status_conduit=lazy_status_conduit,
        )
        self.description = step_description
        self.download_requests = download_requests
        self.download_config = {
            MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')),
            HEADERS: {PULP_STREAM_REQUEST_HEADER: 'true'},
            SSL_VALIDATION: True
        }
        self.downloader = HTTPThreadedDownloader(
            DownloaderConfig(**self.download_config),
            self
        )
Example #15
0
class LazyUnitDownloadStep(Step, DownloadEventListener):
    """
    A Step that downloads all the given requests. The downloader is configured
    to download from the Pulp Streamer components.

    :ivar download_requests: The download requests the step will process.
    :type download_requests: list of nectar.request.DownloadRequest
    :ivar download_config:   The keyword args used to initialize the Nectar
                             downloader configuration.
    :type download_config:   dict
    :ivar downloader:        The Nectar downloader used to fetch the requests.
    :type downloader:        nectar.downloaders.threaded.HTTPThreadedDownloader
    """

    def __init__(self, step_type, step_description, lazy_status_conduit, download_requests):
        """
        Initializes a Step that downloads all the download requests provided.

        :param lazy_status_conduit: Conduit used to update the task status.
        :type  lazy_status_conduit: LazyStatusConduit
        :param download_requests:   List of download requests to process.
        :type  download_requests:   list of nectar.request.DownloadRequest
        """
        super(LazyUnitDownloadStep, self).__init__(
            step_type=step_type,
            status_conduit=lazy_status_conduit,
        )
        self.description = step_description
        self.download_requests = download_requests
        self.download_config = {
            MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')),
            HEADERS: {PULP_STREAM_REQUEST_HEADER: 'true'},
            SSL_VALIDATION: True
        }
        self.downloader = HTTPThreadedDownloader(
            DownloaderConfig(**self.download_config),
            self
        )

    def _process_block(self, item=None):
        """
        This block is called by the `process` loop. This is overridden because
        success and failure is determined during the EventListener callbacks,
        which will handle updating the progress. Since `item` is not used, this
        does not make use of `process_main` and simply calls the downloader.

        Inherited from Step.

        :param item: Unused.
        :type  item: None
        """
        self.downloader.download(self.download_requests)

    def get_total(self):
        """
        The total number of download requests so progress reporting occurs at
        the file level.

        Inherited from Step.

        :return: The number of download requests this step will process.
        :rtype:  int
        """
        return len(self.download_requests)

    def download_started(self, report):
        """
        Checks the filesystem for the file that we are about to download,
        and if it exists, raise an exception which will cause Nectar to
        skip the download.

        Inherited from DownloadEventListener.

        :param report: the report associated with the download request.
        :type  report: nectar.report.DownloadReport

        :raises SkipLocation: if the file is already downloaded and matches
                              the checksum stored in the catalog.
        """
        _logger.debug(_('Starting download of {url}.').format(url=report.url))

        # Remove the deferred entry now that the download has started.
        query_set = DeferredDownload.objects.filter(
            unit_id=report.data[UNIT_ID],
            unit_type_id=report.data[TYPE_ID]
        )
        query_set.delete()

        try:
            # If the file exists and the checksum is valid, don't download it
            path_entry = report.data[UNIT_FILES][report.destination]
            catalog_entry = path_entry[CATALOG_ENTRY]
            self.validate_file(
                catalog_entry.path,
                catalog_entry.checksum_algorithm,
                catalog_entry.checksum
            )
            path_entry[PATH_DOWNLOADED] = True
            self.progress_successes += 1
            self.report_progress()
            msg = _('{path} has already been downloaded.').format(
                path=path_entry[CATALOG_ENTRY].path)
            _logger.debug(msg)
            raise SkipLocation()
        except (InvalidChecksumType, VerificationException, IOError):
            # It's either missing or incorrect, so download it
            pass

    def download_succeeded(self, report):
        """
        Marks the individual file for the unit as downloaded and moves it into
        its final storage location if its checksum value matches the value in
        the catalog entry (if present).

        Inherited from DownloadEventListener.

        :param report: the report associated with the download request.
        :type  report: nectar.report.DownloadReport
        """
        # Reload the content unit
        unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID])
        unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID])
        content_unit = unit_qs.only('_content_type_id', 'id', '_last_updated').get()
        path_entry = report.data[UNIT_FILES][report.destination]

        # Validate the file and update the progress.
        catalog_entry = path_entry[CATALOG_ENTRY]
        try:
            self.validate_file(
                report.destination,
                catalog_entry.checksum_algorithm,
                catalog_entry.checksum
            )

            relative_path = os.path.relpath(
                catalog_entry.path,
                FileStorage.get_path(content_unit)
            )
            if len(report.data[UNIT_FILES]) == 1:
                # If the unit is single-file, update the storage path to point to the file
                content_unit.set_storage_path(relative_path)
                unit_qs.update_one(set___storage_path=content_unit._storage_path)
                content_unit.import_content(report.destination)
            else:
                content_unit.import_content(report.destination, location=relative_path)
            self.progress_successes += 1
            path_entry[PATH_DOWNLOADED] = True
        except (InvalidChecksumType, VerificationException, IOError), e:
            _logger.debug(_('Download of {path} failed: {reason}.').format(
                path=catalog_entry.path, reason=str(e)))
            path_entry[PATH_DOWNLOADED] = False
            self.progress_failures += 1
        self.report_progress()

        # Mark the entire unit as downloaded, if necessary.
        download_flags = [entry[PATH_DOWNLOADED] for entry in
                          report.data[UNIT_FILES].values()]
        if all(download_flags):
            _logger.debug(_('Marking content unit {type}:{id} as downloaded.').format(
                type=content_unit.type_id, id=content_unit.id))
            unit_qs.update_one(set__downloaded=True)
Example #16
0
class V2Repository(object):
    """
    This class represents a Docker v2 repository.
    """
    API_VERSION_CHECK_PATH = '/v2/'
    LAYER_PATH = '/v2/{name}/blobs/{digest}'
    MANIFEST_PATH = '/v2/{name}/manifests/{reference}'
    TAGS_PATH = '/v2/{name}/tags/list'

    def __init__(self, name, download_config, registry_url, working_dir):
        """
        Initialize the V2Repository.

        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """

        # Docker's registry aligns non-namespaced images to the library namespace.
        # if we have a docker registry image, and no namespace, add the library
        # namespace to the image name.

        if '/' not in name and re.search(r'registry[-,\w]*.docker.io',
                                         registry_url, re.IGNORECASE):
            self.name = "library/" + name
        else:
            self.name = name

        self.download_config = download_config
        self.registry_url = registry_url

        # Use basic auth information for retrieving tokens from auth server and for downloading
        # with basic auth
        self.auth_downloader = HTTPThreadedDownloader(
            copy.deepcopy(self.download_config), AggregatingEventListener())
        self.download_config.basic_auth_username = None
        self.download_config.basic_auth_password = None
        self.downloader = HTTPThreadedDownloader(self.download_config,
                                                 AggregatingEventListener())
        self.working_dir = working_dir
        self.token = None

    def api_version_check(self):
        """
        Make a call to the registry URL's /v2/ API call to determine if the registry supports API
        v2.

        :return: True if the v2 API is found, else False
        :rtype:  bool
        """
        _logger.debug(
            'Determining if the registry URL can do v2 of the Docker API.')

        try:
            headers, body = self._get_path(self.API_VERSION_CHECK_PATH)
        except IOError:
            return False

        try:
            version = headers['Docker-Distribution-API-Version']
            if version != "registry/2.0":
                return False
            _logger.debug(
                _('The docker registry is using API version: %(v)s') %
                {'v': version})
        except KeyError:
            # If the Docker-Distribution-API-Version header isn't present, we will assume that this
            # is a valid Docker 2.0 API server so that simple file-based webservers can serve as our
            # remote feed.
            pass

        return True

    def create_blob_download_request(self, digest):
        """
        Return a DownloadRequest instance for the given blob digest.
        It is desirable to download the blob files with a separate
        downloader (for progress tracking, etc), so we just create the download
        requests here and let them get processed elsewhere.

        :param digest:          digest of the docker blob you wish to download
        :type  digest:          basestring

        :return:    a download request instance
        :rtype:     nectar.request.DownloadRequest
        """
        path = self.LAYER_PATH.format(name=self.name, digest=digest)
        url = urlparse.urljoin(self.registry_url, path)
        req = DownloadRequest(url, os.path.join(self.working_dir, digest))
        return req

    def get_manifest(self, reference, headers=True, tag=True):
        """
        Get the manifest and its digest for the given reference.

        :param reference: The reference (tag or digest) of the Manifest you wish to retrieve.
        :type  reference: basestring
        :param headers: True if headers with accepted media type should be sent in the request
        :type  headers: bool
        :param tag: True if the manifest should be retrieved by tag
        :type  tag: bool

        :return:          A 2-tuple of the digest and the manifest, both basestrings
        :rtype:           tuple
        """
        manifests = []
        request_headers = {}
        content_type_header = 'content-type'
        path = self.MANIFEST_PATH.format(name=self.name, reference=reference)
        # we need to skip the check of returned mediatype in case we pull
        # the manifest by digest
        if headers:
            # set the headers for first request
            request_headers['Accept'] = ','.join(
                (constants.MEDIATYPE_MANIFEST_S2,
                 constants.MEDIATYPE_MANIFEST_LIST,
                 constants.MEDIATYPE_MANIFEST_S1,
                 constants.MEDIATYPE_SIGNED_MANIFEST_S1))
        response_headers, manifest = self._get_path(path,
                                                    headers=request_headers)
        # we need to disable here the digest check because of wrong digests registry returns
        # https://github.com/docker/distribution/pull/2310
        # we will just calculate it without camparing it to the value that registry has in the
        # docker-content-digest response header
        digest = models.UnitMixin.calculate_digest(manifest)
        # add manifest and digest
        manifests.append(
            (manifest, digest, response_headers.get(content_type_header)))

        # since in accept headers we have man_list and schema2 mediatype, registry would return
        # whether man list, schema2 or schema1.
        # if it is schema1 we do not need to make any other requests
        # if it is manifest list, we do not need to make any other requests, the converted type
        # for older clients will be requested later during the manifest list process time
        # if it is schema2 we need to ask schema1 for older clients.
        if tag and response_headers.get(
                content_type_header) == constants.MEDIATYPE_MANIFEST_S2:
            request_headers['Accept'] = ','.join(
                (constants.MEDIATYPE_MANIFEST_S1,
                 constants.MEDIATYPE_SIGNED_MANIFEST_S1))
            try:
                # for compatibility with older clients, try to fetch schema1 in case it is available
                response_headers, manifest = self._get_path(
                    path, headers=request_headers)
                digest = self._digest_check(response_headers, manifest)

                # add manifest and digest
                manifests.append((manifest, digest,
                                  response_headers.get(content_type_header)))
            except IOError as e:
                if str(e) != 'Not Found':
                    raise
                pass

        # returned list will be whether:
        # [(S2, digest, content_type), (S1, digest, content_type)]
        # or
        # [(list, digest, content_type)]
        # or
        # [(S1, digest, content_type)]
        # [(S2, digest, content_type)]
        # note the tuple has a new entry content_type which we need later to process
        # returned manifest mediatypes
        return manifests

    def _digest_check(self, headers, manifest):

        digest_header = 'docker-content-digest'
        if digest_header in headers:
            expected_digest = headers[digest_header]
            # The digest is formatted as algorithm:sum, so let's ask our hasher to use the same
            # algorithm as we received in the headers.
            digest = models.Manifest.calculate_digest(
                manifest,
                expected_digest.split(':')[0])
            if digest != expected_digest:
                msg = _(
                    'The Manifest digest does not match the expected value. The remote '
                    'feed announced a digest of {e}, but the downloaded digest was {d}.'
                )
                msg = msg.format(e=expected_digest, d=digest)
                raise IOError(msg)
        else:
            digest = models.Manifest.calculate_digest(manifest)

        return digest

    def get_tags(self):
        """
        Get a list of the available tags in the repository.

        :return: A list of basestrings of the available tags in the repository.
        :rtype:  list
        """
        path = self.TAGS_PATH.format(name=self.name)
        _logger.debug('retrieving tags from remote registry')
        try:
            headers, tags = self._get_path(path)
        except IOError as e:
            raise pulp_exceptions.PulpCodedException(
                error_code=error_codes.DKR1007,
                repo=self.name,
                registry=self.registry_url,
                reason=str(e))
        return json.loads(tags)['tags'] or []

    def _get_path(self, path, headers=None):
        """
        Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and
        the response body.

        :param path: a full http path to retrieve that will be urljoin'd to the upstream registry
                     url.
        :type  path: basestring
        :param headers: headers sent in the request
        :type headers:  dict

        :return:     (headers, response body)
        :rtype:      tuple
        """
        url = urlparse.urljoin(self.registry_url, path)
        _logger.debug(_('Retrieving {0}'.format(url)))
        request = DownloadRequest(url, StringIO())
        request.headers = headers

        if self.token:
            request.headers = auth_util.update_token_auth_header(
                request.headers, self.token)

        report = self.downloader.download_one(request)

        # If the download was unauthorized, check report header, if basic auth is expected
        # retry with basic auth, otherwise attempt to get a token and try again
        if report.state == report.DOWNLOAD_FAILED:
            if report.error_report.get(
                    'response_code') == httplib.UNAUTHORIZED:
                auth_header = report.headers.get('www-authenticate')
                if auth_header is None:
                    raise IOError("401 responses are expected to "
                                  "contain authentication information")
                elif "Basic" in auth_header:
                    _logger.debug(
                        _('Download unauthorized, retrying with basic authentication'
                          ))
                    report = self.auth_downloader.download_one(request)
                else:
                    _logger.debug(
                        _('Download unauthorized, attempting to retrieve a token.'
                          ))
                    self.token = auth_util.request_token(
                        self.auth_downloader, request, auth_header, self.name)
                    request.headers = auth_util.update_token_auth_header(
                        request.headers, self.token)
                    report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            # this condition was added in case the registry would not allow to access v2 endpoint
            # but still token would be valid for other endpoints.
            # see https://pulp.plan.io/issues/2643
            if path == '/v2/' and report.error_report.get(
                    'response_code') == httplib.UNAUTHORIZED:
                pass
            else:
                self._raise_path_error(report)

        return report.headers, report.destination.getvalue()

    @staticmethod
    def _raise_path_error(report):
        """
        Raise an exception with an appropriate error message.

        Specifically because docker hub responds with a 401 for repositories that don't exist, pulp
        cannot disambiguate Unauthorized vs. Not Found. This function tries to make an error message
        that is clear on that point.

        :param report:  download report
        :type  report:  nectar.report.DownloadReport

        :raises IOError:    always, with an appropriate message based on the report
        """
        if report.error_report.get('response_code') == httplib.UNAUTHORIZED:
            # docker hub returns 401 for repos that don't exist, so we cannot disambiguate.
            raise IOError(_('Unauthorized or Not Found'))
        else:
            raise IOError(report.error_msg)
Example #17
0
class V1Repository(object):
    """
    This class represents a Docker v1 repository.
    """
    ANCESTRY_PATH = '/v1/images/%s/ancestry'
    DOCKER_TOKEN_HEADER = 'x-docker-token'
    DOCKER_ENDPOINT_HEADER = 'x-docker-endpoints'
    IMAGES_PATH = '/v1/repositories/%s/images'
    TAGS_PATH = '/v1/repositories/%s/tags'
    API_VERSION_CHECK_PATH = '/v1/_ping'

    def __init__(self, name, download_config, registry_url, working_dir):
        """
        Initialize the V1Repository.

        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """
        self.name = name
        self.download_config = download_config
        self.registry_url = registry_url
        self.listener = AggregatingEventListener()
        self.downloader = HTTPThreadedDownloader(self.download_config,
                                                 self.listener)
        self.working_dir = working_dir
        self.token = None
        self.endpoint = None

    def _get_single_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return its
        body after deserializing it as json

        :param path:    a full http path to retrieve that will be urljoin'd to the
                        upstream registry url.
        :type  path:    basestring

        :return:    whatever gets deserialized out of the response body's json
        """
        # if talking to docker hub, we'll get an endpoint specified, and then we'll have to get
        # tags from that endpoint instead of talking to the original feed URL.
        if self.endpoint:
            # we assume the same scheme that the registry URL used
            registry_url_parts = urlparse.urlsplit(self.registry_url)
            parts = urlparse.SplitResult(scheme=registry_url_parts.scheme,
                                         netloc=self.endpoint,
                                         path=path,
                                         query=None,
                                         fragment=None)
            url = urlparse.urlunsplit(parts)
        else:
            url = urlparse.urljoin(self.registry_url, path)
        request = DownloadRequest(url, StringIO())
        if path.endswith('/images'):
            # this is required by the docker index and indicates that it should
            # return an auth token
            if request.headers is None:
                request.headers = {}
            request.headers[self.DOCKER_TOKEN_HEADER] = 'true'
        # endpoints require auth
        if self.endpoint:
            self.add_auth_header(request)

        report = self.downloader.download_one(request)
        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        self._parse_response_headers(report.headers)
        return json.loads(report.destination.getvalue())

    def _parse_response_headers(self, headers):
        """
        Some responses can include header information that we need later. This
        grabs those values and stores them for later use.

        :param headers: dictionary-like object where keys are HTTP header names
                        and values are their values.
        :type  headers: dict
        """
        # this is used for authorization on an endpoint
        if self.DOCKER_TOKEN_HEADER in headers:
            self.token = headers[self.DOCKER_TOKEN_HEADER]
        # this tells us what host to use when accessing image files
        if self.DOCKER_ENDPOINT_HEADER in headers:
            self.endpoint = headers[self.DOCKER_ENDPOINT_HEADER]

    def api_version_check(self):
        """
        Make a call to the registry URL's /v1/_ping API call to determine if the registry supports
        API v1.

        :return: True if the v1 API is found, else False
        :rtype:  bool
        """
        _logger.debug(
            'Determining if the registry URL can do v1 of the Docker API.')

        try:
            self._get_single_path(self.API_VERSION_CHECK_PATH)
        except IOError:
            return False

        return True

    def add_auth_header(self, request):
        """
        Given a download request, add an Authorization header if we have an
        auth token available.

        :param request: a download request
        :type  request: nectar.request.DownloadRequest
        """
        if self.token:
            if request.headers is None:
                request.headers = {}
            # this emulates what docker itself does
            request.headers['Authorization'] = 'Token %s' % self.token

    def get_image_ids(self):
        """
        Get a list of all images in the upstream repository. This is
        conceptually a little ambiguous, as there can be images in a repo that
        are neither tagged nor in the ancestry for a tagged image.

        :return:    list of image IDs in the repo
        :rtype:     list

        :raises pulp_exceptions.PulpCodedException: if fetching the IDs fails
        """
        path = self.IMAGES_PATH % self.name

        _logger.debug('retrieving image ids from remote registry')
        try:
            raw_data = self._get_single_path(path)
        except IOError as e:
            _logger.debug(traceback.format_exc())
            raise pulp_exceptions.PulpCodedException(
                error_code=error_codes.DKR1007,
                repo=self.name,
                registry=self.registry_url,
                reason=str(e))

        return [item['id'] for item in raw_data]

    def get_image_url(self):
        """
        Get a URL for the registry or the endpoint, for use in retrieving image
        files. The "endpoint" is a host name that might be returned in a header
        when retrieving repository data above.

        :return:    a url that is either the provided registry url, or if an
                    endpoint is known, that same url with the host replaced by
                    the endpoint
        :rtype:     basestring
        """
        if self.endpoint:
            parts = list(urlparse.urlsplit(self.registry_url))
            parts[1] = self.endpoint
            return urlparse.urlunsplit(parts)
        else:
            return self.registry_url

    def get_tags(self):
        """
        Get a dictionary of tags from the upstream repo.

        :return:    a dictionary where keys are tag names, and values are either
                    full image IDs or abbreviated image IDs.
        :rtype:     dict
        """
        repo_name = self.name
        # this is a quirk of the docker registry API.
        if '/' not in repo_name:
            repo_name = 'library/' + repo_name

        path = self.TAGS_PATH % repo_name

        _logger.debug('retrieving tags from remote registry')
        raw_data = self._get_single_path(path)
        # raw_data will sometimes be a list of dicts, and sometimes just a dict,
        # depending on what version of the API we're talking to.
        if isinstance(raw_data, list):
            return dict((tag['name'], tag['layer']) for tag in raw_data)
        return raw_data

    def get_ancestry(self, image_ids):
        """
        Retrieve the "ancestry" file for each provided image ID, and save each
        in a directory whose name is the image ID.

        :param image_ids:   list of image IDs for which the ancestry file
                            should be retrieved
        :type  image_ids:   list

        :raises IOError:    if a download fails
        """
        requests = []
        for image_id in image_ids:
            path = self.ANCESTRY_PATH % image_id
            url = urlparse.urljoin(self.get_image_url(), path)
            destination = os.path.join(self.working_dir, image_id, 'ancestry')
            try:
                os.mkdir(os.path.split(destination)[0])
            except OSError, e:
                # it's ok if the directory already exists
                if e.errno != errno.EEXIST:
                    raise
            request = DownloadRequest(url, destination)
            self.add_auth_header(request)
            requests.append(request)

        _logger.debug('retrieving ancestry files from remote registry')
        self.downloader.download(requests)
        if len(self.listener.failed_reports):
            raise IOError(self.listener.failed_reports[0].error_msg)
Example #18
0
class V2Repository(object):
    """
    This class represents a Docker v2 repository.
    """
    API_VERSION_CHECK_PATH = '/v2/'
    LAYER_PATH = '/v2/{name}/blobs/{digest}'
    MANIFEST_PATH = '/v2/{name}/manifests/{reference}'
    TAGS_PATH = '/v2/{name}/tags/list'

    def __init__(self, name, download_config, registry_url, working_dir):
        """
        Initialize the V2Repository.

        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """
        self.name = name
        self.download_config = download_config
        self.registry_url = registry_url
        self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener())
        self.working_dir = working_dir

    def api_version_check(self):
        """
        Make a call to the registry URL's /v2/ API call to determine if the registry supports API
        v2.

        :return: True if the v2 API is found, else False
        :rtype:  bool
        """
        _logger.debug('Determining if the registry URL can do v2 of the Docker API.')

        try:
            headers, body = self._get_path(self.API_VERSION_CHECK_PATH)
        except IOError:
            return False

        try:
            version = headers['Docker-Distribution-API-Version']
            if version != "registry/2.0":
                return False
            _logger.debug(_('The docker registry is using API version: %(v)s') % {'v': version})
        except KeyError:
            # If the Docker-Distribution-API-Version header isn't present, we will assume that this
            # is a valid Docker 2.0 API server so that simple file-based webservers can serve as our
            # remote feed.
            pass

        return True

    def create_blob_download_request(self, digest):
        """
        Return a DownloadRequest instance for the given blob digest.
        It is desirable to download the blob files with a separate
        downloader (for progress tracking, etc), so we just create the download
        requests here and let them get processed elsewhere.

        :param digest:          digest of the docker blob you wish to download
        :type  digest:          basestring

        :return:    a download request instance
        :rtype:     nectar.request.DownloadRequest
        """
        path = self.LAYER_PATH.format(name=self.name, digest=digest)
        url = urlparse.urljoin(self.registry_url, path)
        req = DownloadRequest(url, os.path.join(self.working_dir, digest))
        return req

    def get_manifest(self, reference):
        """
        Get the manifest and its digest for the given reference.

        :param reference: The reference (tag or digest) of the Manifest you wish to retrieve.
        :type  reference: basestring
        :return:          A 2-tuple of the digest and the manifest, both basestrings
        :rtype:           tuple
        """
        path = self.MANIFEST_PATH.format(name=self.name, reference=reference)
        headers, manifest = self._get_path(path)

        digest_header = 'docker-content-digest'
        if digest_header in headers:
            expected_digest = headers[digest_header]
            # The digest is formatted as algorithm:sum, so let's ask our hasher to use the same
            # algorithm as we received in the headers.
            digest = models.Manifest.calculate_digest(manifest, expected_digest.split(':')[0])
            if digest != expected_digest:
                msg = _('The Manifest digest does not match the expected value. The remote '
                        'feed announced a digest of {e}, but the downloaded digest was {d}.')
                msg = msg.format(e=expected_digest, d=digest)
                raise IOError(msg)
        else:
            digest = models.Manifest.digest(manifest)
        return digest, manifest

    def get_tags(self):
        """
        Get a list of the available tags in the repository.

        :return: A list of basestrings of the available tags in the repository.
        :rtype:  list
        """
        path = self.TAGS_PATH.format(name=self.name)
        headers, tags = self._get_path(path)
        return json.loads(tags)['tags']

    def _get_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and
        the response body.

        :param path: a full http path to retrieve that will be urljoin'd to the upstream registry
                     url.
        :type  path: basestring

        :return:     (headers, response body)
        :rtype:      tuple
        """
        url = urlparse.urljoin(self.registry_url, path)
        _logger.debug(_('Retrieving {0}'.format(url)))
        request = DownloadRequest(url, StringIO())
        report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        return report.headers, report.destination.getvalue()
Example #19
0
class Repository(object):
    IMAGES_PATH = '/v1/repositories/%s/images'
    TAGS_PATH = '/v1/repositories/%s/tags'
    ANCESTRY_PATH = '/v1/images/%s/ancestry'

    DOCKER_TOKEN_HEADER = 'x-docker-token'
    DOCKER_ENDPOINT_HEADER = 'x-docker-endpoints'

    def __init__(self, name, download_config, registry_url, working_dir):
        """
        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """
        self.name = name
        self.download_config = download_config
        self.registry_url = registry_url
        self.listener = AggregatingEventListener()
        self.downloader = HTTPThreadedDownloader(self.download_config, self.listener)
        self.working_dir = working_dir
        self.token = None
        self.endpoint = None

    def _get_single_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return its
        body after deserializing it as json

        :param path:    a full http path to retrieve that will be urljoin'd to the
                        upstream registry url.
        :type  path:    basestring

        :return:    whatever gets deserialized out of the response body's json
        """
        # if talking to docker hub, we'll get an endpoint specified, and then we'll have to get
        # tags from that endpoint instead of talking to the original feed URL.
        if self.endpoint:
            # we assume the same scheme that the registry URL used
            registry_url_parts = urlparse.urlsplit(self.registry_url)
            parts = urlparse.SplitResult(scheme=registry_url_parts.scheme, netloc=self.endpoint,
                                         path=path, query=None, fragment=None)
            url = urlparse.urlunsplit(parts)
        else:
            url = urlparse.urljoin(self.registry_url, path)
        request = DownloadRequest(url, StringIO())
        if path.endswith('/images'):
            # this is required by the docker index and indicates that it should
            # return an auth token
            if request.headers is None:
                request.headers = {}
            request.headers[self.DOCKER_TOKEN_HEADER] = 'true'
        # endpoints require auth
        if self.endpoint:
            self.add_auth_header(request)
        report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        self._parse_response_headers(report.headers)
        return json.loads(report.destination.getvalue())

    def _parse_response_headers(self, headers):
        """
        Some responses can include header information that we need later. This
        grabs those values and stores them for later use.

        :param headers: dictionary-like object where keys are HTTP header names
                        and values are their values.
        :type  headers: dict
        """
        # this is used for authorization on an endpoint
        if self.DOCKER_TOKEN_HEADER in headers:
            self.token = headers[self.DOCKER_TOKEN_HEADER]
        # this tells us what host to use when accessing image files
        if self.DOCKER_ENDPOINT_HEADER in headers:
            self.endpoint = headers[self.DOCKER_ENDPOINT_HEADER]

    def get_image_ids(self):
        """
        Get a list of all images in the upstream repository. This is
        conceptually a little ambiguous, as there can be images in a repo that
        are neither tagged nor in the ancestry for a tagged image.

        :return:    list of image IDs in the repo
        :rtype:     list

        :raises pulp_exceptions.PulpCodedException: if fetching the IDs fails
        """
        path = self.IMAGES_PATH % self.name

        _logger.debug('retrieving image ids from remote registry')
        try:
            raw_data = self._get_single_path(path)
        except IOError:
            _logger.debug(traceback.format_exc())
            raise pulp_exceptions.PulpCodedException(error_code=error_codes.DKR1007,
                                                     repo=self.name,
                                                     registry=self.registry_url)

        return [item['id'] for item in raw_data]

    def get_tags(self):
        """
        Get a dictionary of tags from the upstream repo.

        :return:    a dictionary where keys are tag names, and values are either
                    full image IDs or abbreviated image IDs.
        :rtype:     dict
        """
        repo_name = self.name
        # this is a quirk of the docker registry API.
        if '/' not in repo_name:
            repo_name = 'library/' + repo_name

        path = self.TAGS_PATH % repo_name

        _logger.debug('retrieving tags from remote registry')
        raw_data = self._get_single_path(path)
        # raw_data will sometimes be a list of dicts, and sometimes just a dict,
        # depending on what version of the API we're talking to.
        if isinstance(raw_data, list):
            return dict((tag['name'], tag['layer']) for tag in raw_data)
        return raw_data

    def get_ancestry(self, image_ids):
        """
        Retrieve the "ancestry" file for each provided image ID, and save each
        in a directory whose name is the image ID.

        :param image_ids:   list of image IDs for which the ancestry file
                            should be retrieved
        :type  image_ids:   list

        :raises IOError:    if a download fails
        """
        requests = []
        for image_id in image_ids:
            path = self.ANCESTRY_PATH % image_id
            url = urlparse.urljoin(self.get_image_url(), path)
            destination = os.path.join(self.working_dir, image_id, 'ancestry')
            try:
                dirname = os.path.dirname(destination)
                os.makedirs(dirname)
            except OSError, e:
                # it's ok if the directory already exists
                if e.errno != errno.EEXIST:
                    raise
            request = DownloadRequest(url, destination)
            self.add_auth_header(request)
            requests.append(request)

        _logger.debug('retrieving ancestry files from remote registry')
        self.downloader.download(requests)
        if len(self.listener.failed_reports):
            raise IOError(self.listener.failed_reports[0].error_msg)
Example #20
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(
            importer_constants.KEY_VALIDATE,
            default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(
            importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert':
            config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key':
            config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)
        }
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does
        # not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts)

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [
            request.DownloadRequest(iso.url, iso.storage_path, iso)
            for iso in manifest
        ]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url,
                                                   manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(
                _("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _(
                'The PULP_MANIFEST file was not in the ' + 'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _filter_missing_isos(self, manifest):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        def _unit_key_str(iso):
            """
            Return a simple string representation of the unit key of the ISO.

            :param iso: The ISO for which we want a unit key string representation
            :type  iso: pulp_rpm.plugins.db.models.ISO
            """
            return '%s-%s-%s' % (iso.name, iso.checksum, iso.size)

        # A list of all the ISOs we have in Pulp
        search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO)
        existing_units = self.sync_conduit.search_all_units(
            models.ISO.TYPE, search_criteria)
        existing_units_by_key = dict([
            (_unit_key_str(models.ISO.from_unit(unit)), unit)
            for unit in existing_units
        ])
        existing_unit_keys = set([
            _unit_key_str(models.ISO.from_unit(unit))
            for unit in existing_units
        ])

        # A list of units currently associated with the repository
        search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE])
        existing_repo_units = self.sync_conduit.get_units(search_criteria)
        existing_repo_units_by_key = dict([
            (_unit_key_str(models.ISO.from_unit(unit)), unit)
            for unit in existing_repo_units
        ])
        existing_repo_unit_keys = set([
            _unit_key_str(models.ISO.from_unit(unit))
            for unit in existing_repo_units
        ])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(_unit_key_str(iso), iso)
                                      for iso in manifest])
        available_iso_keys = set([_unit_key_str(iso) for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set(
            [iso for iso in available_iso_keys if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [
            existing_units_by_key[k] for k in local_available_iso_keys
        ]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [
            available_isos_by_key[k] for k in local_missing_iso_keys
        ]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys -
                                        available_iso_keys)
        remote_missing_units = [
            existing_repo_units_by_key[k] for k in remote_missing_unit_keys
        ]

        return local_missing_isos, local_available_units, remote_missing_units

    def _remove_units(self, units):
        """
        Use the sync_conduit's remove_unit call for each unit in units.

        :param units: List of pulp.plugins.model.Units that we want to remove from the repository
        :type  units: list
        """
        for unit in units:
            self.sync_conduit.remove_unit(unit)
Example #21
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """
    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self.config = config
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(
            importer_constants.KEY_VALIDATE,
            default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(
            importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed':
            max_speed,
            'max_concurrent':
            max_downloads,
            'ssl_client_cert':
            config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key':
            config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert':
            config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation':
            ssl_validation,
            'proxy_url':
            config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port':
            config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username':
            config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password':
            config.get(importer_constants.KEY_PROXY_PASS),
            'basic_auth_username':
            config.get(importer_constants.KEY_BASIC_AUTH_USER),
            'basic_auth_password':
            config.get(importer_constants.KEY_BASIC_AUTH_PASS),
            'working_dir':
            common_utils.get_working_directory()
        }
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

        self.repo_units = []

    @property
    def download_deferred(self):
        """
        Test the download policy to determine if downloading is deferred.

        :return: True if deferred.
        :rtype: bool
        """
        policy = self.config.get(importer_constants.DOWNLOAD_POLICY,
                                 importer_constants.DOWNLOAD_IMMEDIATE)
        return policy != importer_constants.DOWNLOAD_IMMEDIATE

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            iso.set_storage_path(os.path.basename(report.destination))
            try:
                if self._validate_downloads:
                    iso.validate_iso(report.destination)
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                self._associate_unit(self.sync_conduit.repo, iso)
                iso.safe_import_content(report.destination)

                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def add_catalog_entries(self, units):
        """
        Add entries to the deferred downloading (lazy) catalog.

        Skip entries which are not eligible for lazy catalog.
        (Don't have url attribute.)

        :param units: A list of: pulp_rpm.plugins.db.models.ISO.
        :type units: list
        """
        for unit in units:
            # Unit is from pulp manifest
            if not hasattr(unit, "url"):
                continue
            if not unit.storage_path:
                unit.set_storage_path(unit.name)
            entry = LazyCatalogEntry()
            entry.path = unit.storage_path
            entry.importer_id = str(self.sync_conduit.importer_object_id)
            entry.unit_id = unit.id
            entry.unit_type_id = unit.type_id
            entry.url = unit.url
            entry.checksum = unit.checksum
            # The current ISO model does not define a checksum type, but appears to use sha256.
            # Once the model includes the checksum type, this should use that field.
            entry.checksum_algorithm = 'sha256'
            entry.save_revision()

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest,
                                                  self.download_deferred)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            for iso in local_available_isos:
                self._associate_unit(self.sync_conduit.repo, iso)

        # Deferred downloading (Lazy) entries.
        self.add_catalog_entries(local_available_isos)

        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS

        # Download files and add units.
        if self.download_deferred:
            for iso in local_missing_isos:
                iso.downloaded = False
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                else:
                    self.add_catalog_entries([iso])
                self._associate_unit(self.sync_conduit.repo, iso)
        else:
            self._download_isos(local_missing_isos)

        # Remove unwanted iso units
        if self._remove_missing_units:
            repo_controller.disassociate_units(self.sync_conduit.repo,
                                               remote_missing_isos)
            for unit in remote_missing_isos:
                qs = LazyCatalogEntry.objects.filter(importer_id=str(
                    self.sync_conduit.importer_object_id),
                                                     unit_id=unit.id,
                                                     unit_type_id=unit.type_id)
                qs.delete()

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_directory = common_utils.get_working_directory()
        download_requests = []
        for iso in manifest:
            iso_tmp_dir = tempfile.mkdtemp(dir=download_directory)
            iso_name = os.path.basename(iso.url)
            iso_download_path = os.path.join(iso_tmp_dir, iso_name)
            download_requests.append(
                request.DownloadRequest(iso.url, iso_download_path, iso))
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url,
                                                   manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(
                _("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _(
                'The PULP_MANIFEST file was not in the ' + 'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _associate_unit(self, repo, unit):
        """
        Associate an iso unit with a repository but first check if there's already any with the same
        name and if so, remove them.

        :param repo: An ISO repository that is being synced
        :type  repo: pulp.server.db.model.Repository
        :param unit: An ISO unit to associate with repo
        :type  unit: pulp_rpm.plugins.db.models.ISO
        """
        if not self.repo_units:
            # store the existing repo units to prevent querying mongo multiple times
            self.repo_units = list(
                repo_controller.find_repo_content_units(
                    repo, yield_content_unit=True))

        units_to_remove = [
            iso for iso in self.repo_units if iso['name'] == unit['name']
        ]

        repo_controller.disassociate_units(repo, units_to_remove)
        repo_controller.associate_single_unit(repo, unit)

    def _filter_missing_isos(self, manifest, download_deferred):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :param download_deferred: indicates downloading is deferred (or not).
        :type  download_deferred: bool
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        # A list of all the ISOs we have in Pulp
        existing_units = models.ISO.objects()
        existing_units_by_key = dict([
            (unit.unit_key_str, unit) for unit in existing_units
            if not download_deferred and os.path.isfile(unit.storage_path)
        ])
        existing_units.rewind()
        existing_unit_keys = set([
            unit.unit_key_str for unit in existing_units
            if not download_deferred and os.path.isfile(unit.storage_path)
        ])

        # A list of units currently associated with the repository
        existing_repo_units = repo_controller.find_repo_content_units(
            self.sync_conduit.repo, yield_content_unit=True)
        existing_repo_units = list(existing_repo_units)
        existing_repo_units_by_key = dict([(unit.unit_key_str, unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set(
            [unit.unit_key_str for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(iso.unit_key_str, iso)
                                      for iso in manifest])
        available_iso_keys = set([iso.unit_key_str for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set(
            [iso for iso in available_iso_keys if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [
            existing_units_by_key[k] for k in local_available_iso_keys
        ]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [
            available_isos_by_key[k] for k in local_missing_iso_keys
        ]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys -
                                        available_iso_keys)
        remote_missing_units = [
            existing_repo_units_by_key[k] for k in remote_missing_unit_keys
        ]

        return local_missing_isos, local_available_units, remote_missing_units
Example #22
0
class V2Repository(object):
    """
    This class represents a Docker v2 repository.
    """
    API_VERSION_CHECK_PATH = '/v2/'
    LAYER_PATH = '/v2/{name}/blobs/{digest}'
    MANIFEST_PATH = '/v2/{name}/manifests/{reference}'
    TAGS_PATH = '/v2/{name}/tags/list'

    def __init__(self, name, download_config, registry_url, working_dir):
        """
        Initialize the V2Repository.

        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """

        # Docker's registry aligns non-namespaced images to the library namespace.
        # if we have a docker registry image, and no namespace, add the library
        # namespace to the image name.

        if '/' not in name and re.search(r'registry[-,\w]*.docker.io',
                                         registry_url, re.IGNORECASE):
            self.name = "library/" + name
        else:
            self.name = name

        self.download_config = download_config
        self.registry_url = registry_url
        self.downloader = HTTPThreadedDownloader(self.download_config,
                                                 AggregatingEventListener())
        self.working_dir = working_dir
        self.token = None

    def api_version_check(self):
        """
        Make a call to the registry URL's /v2/ API call to determine if the registry supports API
        v2.

        :return: True if the v2 API is found, else False
        :rtype:  bool
        """
        _logger.debug(
            'Determining if the registry URL can do v2 of the Docker API.')

        try:
            headers, body = self._get_path(self.API_VERSION_CHECK_PATH)
        except IOError:
            return False

        try:
            version = headers['Docker-Distribution-API-Version']
            if version != "registry/2.0":
                return False
            _logger.debug(
                _('The docker registry is using API version: %(v)s') %
                {'v': version})
        except KeyError:
            # If the Docker-Distribution-API-Version header isn't present, we will assume that this
            # is a valid Docker 2.0 API server so that simple file-based webservers can serve as our
            # remote feed.
            pass

        return True

    def create_blob_download_request(self, digest):
        """
        Return a DownloadRequest instance for the given blob digest.
        It is desirable to download the blob files with a separate
        downloader (for progress tracking, etc), so we just create the download
        requests here and let them get processed elsewhere.

        :param digest:          digest of the docker blob you wish to download
        :type  digest:          basestring

        :return:    a download request instance
        :rtype:     nectar.request.DownloadRequest
        """
        path = self.LAYER_PATH.format(name=self.name, digest=digest)
        url = urlparse.urljoin(self.registry_url, path)
        req = DownloadRequest(url, os.path.join(self.working_dir, digest))
        return req

    def get_manifest(self, reference):
        """
        Get the manifest and its digest for the given reference.

        :param reference: The reference (tag or digest) of the Manifest you wish to retrieve.
        :type  reference: basestring
        :return:          A 2-tuple of the digest and the manifest, both basestrings
        :rtype:           tuple
        """
        path = self.MANIFEST_PATH.format(name=self.name, reference=reference)
        headers, manifest = self._get_path(path)

        digest_header = 'docker-content-digest'
        if digest_header in headers:
            expected_digest = headers[digest_header]
            # The digest is formatted as algorithm:sum, so let's ask our hasher to use the same
            # algorithm as we received in the headers.
            digest = models.Manifest.calculate_digest(
                manifest,
                expected_digest.split(':')[0])
            if digest != expected_digest:
                msg = _(
                    'The Manifest digest does not match the expected value. The remote '
                    'feed announced a digest of {e}, but the downloaded digest was {d}.'
                )
                msg = msg.format(e=expected_digest, d=digest)
                raise IOError(msg)
        else:
            digest = models.Manifest.calculate_digest(manifest)
        return digest, manifest

    def get_tags(self):
        """
        Get a list of the available tags in the repository.

        :return: A list of basestrings of the available tags in the repository.
        :rtype:  list
        """
        path = self.TAGS_PATH.format(name=self.name)
        headers, tags = self._get_path(path)
        return json.loads(tags)['tags']

    def _get_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and
        the response body.

        :param path: a full http path to retrieve that will be urljoin'd to the upstream registry
                     url.
        :type  path: basestring

        :return:     (headers, response body)
        :rtype:      tuple
        """
        url = urlparse.urljoin(self.registry_url, path)
        _logger.debug(_('Retrieving {0}'.format(url)))
        request = DownloadRequest(url, StringIO())

        if self.token:
            token_util.add_auth_header(request, self.token)

        report = self.downloader.download_one(request)

        # If the download was unauthorized, attempt to get a token and try again
        if report.state == report.DOWNLOAD_FAILED:
            if report.error_report.get(
                    'response_code') == httplib.UNAUTHORIZED:
                _logger.debug(
                    _('Download unauthorized, attempting to retrieve a token.')
                )
                self.token = token_util.request_token(self.downloader, request,
                                                      report.headers)
                token_util.add_auth_header(request, self.token)
                report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        return report.headers, report.destination.getvalue()
Example #23
0
class DownloadStep(PluginStep, listener.DownloadEventListener):
    def __init__(self,
                 step_type,
                 downloads=None,
                 repo=None,
                 conduit=None,
                 config=None,
                 working_dir=None,
                 plugin_type=None,
                 description=''):
        """
        Set the default parent and step_type for the Download step

        :param step_type: The id of the step this processes
        :type  step_type: str
        :param downloads: A list of DownloadRequests
        :type  downloads: list of nectar.request.DownloadRequest
        :param repo: The repo to be published
        :type  repo: pulp.plugins.model.Repository
        :param conduit: The conduit for the repo
        :type  conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config: The publish configuration
        :type  config: PluginCallConfiguration
        :param working_dir: The temp directory this step should use for processing
        :type  working_dir: str
        :param plugin_type: The type of the plugin
        :type  plugin_type: str
        :param description: The text description that will be displayed to users
        :type  description: basestring
        """

        super(DownloadStep, self).__init__(step_type,
                                           repo=repo,
                                           conduit=conduit,
                                           config=config,
                                           working_dir=working_dir,
                                           plugin_type=plugin_type)
        if downloads is not None:
            self._downloads = downloads
        else:
            self._downloads = []
        self.step_type = step_type
        self.repo = repo
        self.conduit = conduit
        self.config = config
        self.working_dir = working_dir
        self.plugin_type = plugin_type
        self.description = description

    def initialize(self):
        """
        Set up the nectar downloader

        Originally based on the ISO sync setup
        """
        config = self.get_config()
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=True)
        self._repo_url = encode_unicode(config.get(
            importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use
        # urljoin to determine the path later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        downloader_config = importer_config_to_nectar_config(config.flatten())

        # We will pass self as the event_listener, so that we can receive the
        # callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)

    @property
    def downloads(self):
        """
        This lets the class be instantiated with "downloads" as a generator that
        gets lazily evaluated. This is helpful, because at the time of
        instantiation, it is probably not known what downloads will be
        required.

        :return:    list of download requests (nectar.request.DownloadRequest)
        :rtype:     list
        """
        if not isinstance(self._downloads, list):
            self._downloads = list(self._downloads)
        return self._downloads

    def get_total(self):
        """
        Get total number of items to download

        :returns: number of DownloadRequests
        :rtype: int
        """
        return len(self.downloads)

    def _process_block(self):
        """
        the main "do stuff" method. In this case, just kick off all the
        downloads.
        """
        self.downloader.download(self.downloads)

    # from listener.DownloadEventListener
    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download succeeds. Bump the successes counter and report progress.

        :param report: report (passed in from nectar but currently not used)
        :type  report: pulp.plugins.model.PublishReport
        """
        self.progress_successes += 1
        self.report_progress()

    # from listener.DownloadEventListener
    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails. Bump the failure counter and report progress.

        :param report: report (passed in from nectar but currently not used)
        :type  report: pulp.plugins.model.PublishReport
        """
        self.progress_failures += 1
        self.report_progress()

    def cancel(self):
        """
        Cancel the current step
        """
        super(DownloadStep, self).cancel()
        self.downloader.cancel()
Example #24
0
class LazyUnitDownloadStep(Step, DownloadEventListener):
    """
    A Step that downloads all the given requests. The downloader is configured
    to download from the Pulp Streamer components.

    :ivar download_requests: The download requests the step will process.
    :type download_requests: list of nectar.request.DownloadRequest
    :ivar download_config:   The keyword args used to initialize the Nectar
                             downloader configuration.
    :type download_config:   dict
    :ivar downloader:        The Nectar downloader used to fetch the requests.
    :type downloader:        nectar.downloaders.threaded.HTTPThreadedDownloader
    """
    def __init__(self, step_type, step_description, lazy_status_conduit,
                 download_requests):
        """
        Initializes a Step that downloads all the download requests provided.

        :param lazy_status_conduit: Conduit used to update the task status.
        :type  lazy_status_conduit: LazyStatusConduit
        :param download_requests:   List of download requests to process.
        :type  download_requests:   list of nectar.request.DownloadRequest
        """
        super(LazyUnitDownloadStep, self).__init__(
            step_type=step_type,
            status_conduit=lazy_status_conduit,
        )
        self.description = step_description
        self.download_requests = download_requests
        self.download_config = {
            MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')),
            HEADERS: {
                PULP_STREAM_REQUEST_HEADER: 'true'
            },
            SSL_VALIDATION: True
        }
        self.downloader = HTTPThreadedDownloader(
            DownloaderConfig(**self.download_config), self)

    def _process_block(self, item=None):
        """
        This block is called by the `process` loop. This is overridden because
        success and failure is determined during the EventListener callbacks,
        which will handle updating the progress. Since `item` is not used, this
        does not make use of `process_main` and simply calls the downloader.

        Inherited from Step.

        :param item: Unused.
        :type  item: None
        """
        self.downloader.download(self.download_requests)

    def get_total(self):
        """
        The total number of download requests so progress reporting occurs at
        the file level.

        Inherited from Step.

        :return: The number of download requests this step will process.
        :rtype:  int
        """
        return len(self.download_requests)

    def download_started(self, report):
        """
        Checks the filesystem for the file that we are about to download,
        and if it exists, raise an exception which will cause Nectar to
        skip the download.

        Inherited from DownloadEventListener.

        :param report: the report associated with the download request.
        :type  report: nectar.report.DownloadReport

        :raises SkipLocation: if the file is already downloaded and matches
                              the checksum stored in the catalog.
        """
        _logger.debug(_('Starting download of {url}.').format(url=report.url))

        # Remove the deferred entry now that the download has started.
        query_set = DeferredDownload.objects.filter(
            unit_id=report.data[UNIT_ID], unit_type_id=report.data[TYPE_ID])
        query_set.delete()

        try:
            # If the file exists and the checksum is valid, don't download it
            path_entry = report.data[UNIT_FILES][report.destination]
            catalog_entry = path_entry[CATALOG_ENTRY]
            self.validate_file(catalog_entry.path,
                               catalog_entry.checksum_algorithm,
                               catalog_entry.checksum)
            path_entry[PATH_DOWNLOADED] = True
            self.progress_successes += 1
            self.report_progress()
            msg = _('{path} has already been downloaded.').format(
                path=path_entry[CATALOG_ENTRY].path)
            _logger.debug(msg)
            raise SkipLocation()
        except (InvalidChecksumType, VerificationException, IOError):
            # It's either missing or incorrect, so download it
            pass

    def download_succeeded(self, report):
        """
        Marks the individual file for the unit as downloaded and moves it into
        its final storage location if its checksum value matches the value in
        the catalog entry (if present).

        Inherited from DownloadEventListener.

        :param report: the report associated with the download request.
        :type  report: nectar.report.DownloadReport
        """
        # Reload the content unit
        unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID])
        unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID])
        content_unit = unit_qs.only('_content_type_id', 'id',
                                    '_last_updated').get()
        path_entry = report.data[UNIT_FILES][report.destination]

        # Validate the file and update the progress.
        catalog_entry = path_entry[CATALOG_ENTRY]
        try:
            self.validate_file(report.destination,
                               catalog_entry.checksum_algorithm,
                               catalog_entry.checksum)

            relative_path = os.path.relpath(catalog_entry.path,
                                            FileStorage.get_path(content_unit))
            if len(report.data[UNIT_FILES]) == 1:
                # If the unit is single-file, update the storage path to point to the file
                content_unit.set_storage_path(relative_path)
                unit_qs.update_one(
                    set___storage_path=content_unit._storage_path)
                content_unit.import_content(report.destination)
            else:
                content_unit.import_content(report.destination,
                                            location=relative_path)
            self.progress_successes += 1
            path_entry[PATH_DOWNLOADED] = True
        except (InvalidChecksumType, VerificationException, IOError), e:
            _logger.debug(
                _('Download of {path} failed: {reason}.').format(
                    path=catalog_entry.path, reason=str(e)))
            path_entry[PATH_DOWNLOADED] = False
            self.progress_failures += 1
        self.report_progress()

        # Mark the entire unit as downloaded, if necessary.
        download_flags = [
            entry[PATH_DOWNLOADED]
            for entry in report.data[UNIT_FILES].values()
        ]
        if all(download_flags):
            _logger.debug(
                _('Marking content unit {type}:{id} as downloaded.').format(
                    type=content_unit.type_id, id=content_unit.id))
            unit_qs.update_one(set__downloaded=True)
Example #25
0
 def get_downloader(self, conduit, config, url):
     if url.startswith('http'):
         return HTTPThreadedDownloader(nectar_config(config))
     if url.startswith('file'):
         return LocalFileDownloader(nectar_config(config))
     raise ValueError('unsupported url')
Example #26
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep the state so
    that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself
    to the downloader library and receive the callbacks when downloads are complete.
    """
    def __init__(self, sync_conduit, config):
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(importer_constants.KEY_UNITS_REMOVE_MISSING,
                                                default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in downloading a
        file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add
        the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest)
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the
        Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: list
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want it to be stored,
        # and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.common.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError, e:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +\
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest
Example #27
0
class DownloadStep(PluginStep, listener.DownloadEventListener):

    def __init__(self, step_type, downloads=None, repo=None, conduit=None, config=None,
                 working_dir=None, plugin_type=None, description=''):
        """
        Set the default parent and step_type for the Download step

        :param step_type: The id of the step this processes
        :type  step_type: str
        :param downloads: A list of DownloadRequests
        :type  downloads: list of nectar.request.DownloadRequest
        :param repo: The repo to be published
        :type  repo: pulp.plugins.model.Repository
        :param conduit: The conduit for the repo
        :type  conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config: The publish configuration
        :type  config: PluginCallConfiguration
        :param working_dir: The temp directory this step should use for processing
        :type  working_dir: str
        :param plugin_type: The type of the plugin
        :type  plugin_type: str
        :param description: The text description that will be displayed to users
        :type  description: basestring
        """

        super(DownloadStep, self).__init__(step_type, repo=repo, conduit=conduit,
                                           config=config, working_dir=working_dir,
                                           plugin_type=plugin_type)
        if downloads is not None:
            self._downloads = downloads
        else:
            self._downloads = []
        self.step_type = step_type
        self.repo = repo
        self.conduit = conduit
        self.config = config
        self.working_dir = working_dir
        self.plugin_type = plugin_type
        self.description = description

    def initialize(self):
        """
        Set up the nectar downloader

        Originally based on the ISO sync setup
        """
        config = self.get_config()
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use
        # urljoin to determine the path later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        downloader_config = importer_config_to_nectar_config(config.flatten())

        # We will pass self as the event_listener, so that we can receive the
        # callbacks in this class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)

    @property
    def downloads(self):
        """
        This lets the class be instantiated with "downloads" as a generator that
        gets lazily evaluated. This is helpful, because at the time of
        instantiation, it is probably not known what downloads will be
        required.

        :return:    list of download requests (nectar.request.DownloadRequest)
        :rtype:     list
        """
        if not isinstance(self._downloads, list):
            self._downloads = list(self._downloads)
        return self._downloads

    def get_total(self):
        """
        Get total number of items to download

        :returns: number of DownloadRequests
        :rtype: int
        """
        return len(self.downloads)

    def _process_block(self):
        """
        the main "do stuff" method. In this case, just kick off all the
        downloads.
        """
        self.downloader.download(self.downloads)

    # from listener.DownloadEventListener
    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download succeeds. Bump the successes counter and report progress.

        :param report: report (passed in from nectar but currently not used)
        :type  report: pulp.plugins.model.PublishReport
        """
        self.progress_successes += 1
        self.report_progress()

    # from listener.DownloadEventListener
    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails. Bump the failure counter and report progress.

        :param report: report (passed in from nectar but currently not used)
        :type  report: pulp.plugins.model.PublishReport
        """
        self.progress_failures += 1
        self.report_progress()

    def cancel(self):
        """
        Cancel the current step
        """
        super(DownloadStep, self).cancel()
        self.downloader.cancel()
Example #28
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """

    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self.config = config
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS),
            'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER),
            'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS),
            'working_dir': common_utils.get_working_directory()}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

        self.repo_units = []

    @property
    def download_deferred(self):
        """
        Test the download policy to determine if downloading is deferred.

        :return: True if deferred.
        :rtype: bool
        """
        policy = self.config.get(
            importer_constants.DOWNLOAD_POLICY,
            importer_constants.DOWNLOAD_IMMEDIATE)
        return policy != importer_constants.DOWNLOAD_IMMEDIATE

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        _logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            iso.set_storage_path(os.path.basename(report.destination))
            try:
                if self._validate_downloads:
                    iso.validate_iso(report.destination)
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                self._associate_unit(self.sync_conduit.repo, iso)
                iso.safe_import_content(report.destination)

                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def add_catalog_entries(self, units):
        """
        Add entries to the deferred downloading (lazy) catalog.

        Skip entries which are not eligible for lazy catalog.
        (Don't have url attribute.)

        :param units: A list of: pulp_rpm.plugins.db.models.ISO.
        :type units: list
        """
        for unit in units:
            # Unit is from pulp manifest
            if not hasattr(unit, "url"):
                continue
            if not unit.storage_path:
                unit.set_storage_path(unit.name)
            entry = LazyCatalogEntry()
            entry.path = unit.storage_path
            entry.importer_id = str(self.sync_conduit.importer_object_id)
            entry.unit_id = unit.id
            entry.unit_type_id = unit.type_id
            entry.url = unit.url
            entry.checksum = unit.checksum
            # The current ISO model does not define a checksum type, but appears to use sha256.
            # Once the model includes the checksum type, this should use that field.
            entry.checksum_algorithm = 'sha256'
            entry.save_revision()

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest, self.download_deferred)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO._content_type_id.default, search_dicts)

        # Deferred downloading (Lazy) entries.
        self.add_catalog_entries(local_available_isos)

        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS

        # Download files and add units.
        if self.download_deferred:
            for iso in local_missing_isos:
                iso.downloaded = False
                try:
                    iso.save()
                except NotUniqueError:
                    iso = iso.__class__.objects.filter(**iso.unit_key).first()
                else:
                    self.add_catalog_entries([iso])
                self._associate_unit(self.sync_conduit.repo, iso)
        else:
            self._download_isos(local_missing_isos)

        # Remove unwanted iso units
        if self._remove_missing_units:
            repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos)
            for unit in remote_missing_isos:
                qs = LazyCatalogEntry.objects.filter(
                    importer_id=str(self.sync_conduit.importer_object_id),
                    unit_id=unit.id,
                    unit_type_id=unit.type_id)
                qs.delete()

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_directory = common_utils.get_working_directory()
        download_requests = []
        for iso in manifest:
            iso_tmp_dir = tempfile.mkdtemp(dir=download_directory)
            iso_name = os.path.basename(iso.url)
            iso_download_path = os.path.join(iso_tmp_dir, iso_name)
            download_requests.append(request.DownloadRequest(iso.url, iso_download_path, iso))
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _associate_unit(self, repo, unit):
        """
        Associate an iso unit with a repository but first check if there's already any with the same
        name and if so, remove them.

        :param repo: An ISO repository that is being synced
        :type  repo: pulp.server.db.model.Repository
        :param unit: An ISO unit to associate with repo
        :type  unit: pulp_rpm.plugins.db.models.ISO
        """
        if not self.repo_units:
            # store the existing repo units to prevent querying mongo multiple times
            self.repo_units = repo_controller.find_repo_content_units(repo, yield_content_unit=True)

        units_to_remove = [iso for iso in self.repo_units if iso['name'] == unit['name']]

        repo_controller.disassociate_units(repo, units_to_remove)
        repo_controller.associate_single_unit(repo, unit)

    def _filter_missing_isos(self, manifest, download_deferred):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :param download_deferred: indicates downloading is deferred (or not).
        :type  download_deferred: bool
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """
        # A list of all the ISOs we have in Pulp
        existing_units = models.ISO.objects()
        existing_units_by_key = dict([(unit.unit_key_str, unit)
                                      for unit in existing_units if not download_deferred and
                                      os.path.isfile(unit.storage_path)])
        existing_units.rewind()
        existing_unit_keys = set([unit.unit_key_str
                                  for unit in existing_units if not download_deferred and
                                  os.path.isfile(unit.storage_path)])

        # A list of units currently associated with the repository
        existing_repo_units = repo_controller.find_repo_content_units(
            self.sync_conduit.repo, yield_content_unit=True)
        existing_repo_units = list(existing_repo_units)
        existing_repo_units_by_key = dict([(unit.unit_key_str, unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set([unit.unit_key_str
                                       for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest])
        available_iso_keys = set([iso.unit_key_str for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set([iso for iso in available_iso_keys
                                        if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys)
        remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys]

        return local_missing_isos, local_available_units, remote_missing_units
Example #29
0
 def _create_and_configure_downloader(self, listener):
     config = importer_config_to_nectar_config(self.config.flatten())
     return HTTPThreadedDownloader(config, listener)
Example #30
0
class Repository(object):
    IMAGES_PATH = '/v1/repositories/%s/images'
    TAGS_PATH = '/v1/repositories/%s/tags'
    ANCESTRY_PATH = '/v1/images/%s/ancestry'

    DOCKER_TOKEN_HEADER = 'x-docker-token'
    DOCKER_ENDPOINT_HEADER = 'x-docker-endpoints'

    def __init__(self, name, download_config, registry_url, working_dir):
        """
        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """
        self.name = name
        self.download_config = download_config
        self.registry_url = registry_url
        self.listener = AggregatingEventListener()
        self.downloader = HTTPThreadedDownloader(self.download_config,
                                                 self.listener)
        self.working_dir = working_dir
        self.token = None
        self.endpoint = None

    def _get_single_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return its
        body after deserializing it as json

        :param path:    a full http path to retrieve that will be urljoin'd to the
                        upstream registry url.
        :type  path:    basestring

        :return:    whatever gets deserialized out of the response body's json
        """
        url = urlparse.urljoin(self.registry_url, path)
        request = DownloadRequest(url, StringIO())
        if path.endswith('/images'):
            # this is required by the docker index and indicates that it should
            # return an auth token
            if request.headers is None:
                request.headers = {}
            request.headers[self.DOCKER_TOKEN_HEADER] = 'true'
        report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        self._parse_response_headers(report.headers)
        return json.loads(report.destination.getvalue())

    def _parse_response_headers(self, headers):
        """
        Some responses can include header information that we need later. This
        grabs those values and stores them for later use.

        :param headers: dictionary-like object where keys are HTTP header names
                        and values are their values.
        :type  headers: dict
        """
        # this is used for authorization on an endpoint
        if self.DOCKER_TOKEN_HEADER in headers:
            self.token = headers[self.DOCKER_TOKEN_HEADER]
        # this tells us what host to use when accessing image files
        if self.DOCKER_ENDPOINT_HEADER in headers:
            self.endpoint = headers[self.DOCKER_ENDPOINT_HEADER]

    def get_image_ids(self):
        """
        Get a list of all images in the upstream repository. This is
        conceptually a little ambiguous, as there can be images in a repo that
        are neither tagged nor in the ancestry for a tagged image.

        :return:    list of image IDs in the repo
        :rtype:     list
        """
        path = self.IMAGES_PATH % self.name

        _logger.debug('retrieving image ids from remote registry')
        raw_data = self._get_single_path(path)
        return [item['id'] for item in raw_data]

    def get_tags(self):
        """
        Get a dictionary of tags from the upstream repo.

        :return:    a dictionary where keys are tag names, and values are either
                    full image IDs or abbreviated image IDs.
        :rtype:     dict
        """
        repo_name = self.name
        # this is a quirk of the docker registry API.
        if '/' not in repo_name:
            repo_name = 'library/' + repo_name

        path = self.TAGS_PATH % repo_name

        _logger.debug('retrieving tags from remote registry')
        raw_data = self._get_single_path(path)
        # raw_data will sometimes be a list of dicts, and sometimes just a dict,
        # depending on what version of the API we're talking to.
        if isinstance(raw_data, list):
            return dict((tag['name'], tag['layer']) for tag in raw_data)
        return raw_data

    def get_ancestry(self, image_ids):
        """
        Retrieve the "ancestry" file for each provided image ID, and save each
        in a directory whose name is the image ID.

        :param image_ids:   list of image IDs for which the ancestry file
                            should be retrieved
        :type  image_ids:   list

        :raises IOError:    if a download fails
        """
        requests = []
        for image_id in image_ids:
            path = self.ANCESTRY_PATH % image_id
            url = urlparse.urljoin(self.get_image_url(), path)
            destination = os.path.join(self.working_dir, image_id, 'ancestry')
            try:
                os.mkdir(os.path.split(destination)[0])
            except OSError, e:
                # it's ok if the directory already exists
                if e.errno != errno.EEXIST:
                    raise
            request = DownloadRequest(url, destination)
            self.add_auth_header(request)
            requests.append(request)

        _logger.debug('retrieving ancestry files from remote registry')
        self.downloader.download(requests)
        if len(self.listener.failed_reports):
            raise IOError(self.listener.failed_reports[0].error_msg)
Example #31
0
class ISOSyncRun(listener.DownloadEventListener):
    """
    This class maintains state for a single repository sync (do not reuse it). We need to keep
    the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener
    so it can pass itself to the downloader library and receive the callbacks when downloads are
    complete.
    """

    def __init__(self, sync_conduit, config):
        """
        Initialize an ISOSyncRun.

        :param sync_conduit: the sync conduit to use for this sync run.
        :type  sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:       plugin configuration
        :type  config:       pulp.plugins.config.PluginCallConfiguration
        """
        self.sync_conduit = sync_conduit
        self._remove_missing_units = config.get(
            importer_constants.KEY_UNITS_REMOVE_MISSING,
            default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT)
        self._validate_downloads = config.get(importer_constants.KEY_VALIDATE,
                                              default=constants.CONFIG_VALIDATE_DEFAULT)
        self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED))
        # The _repo_url must end in a trailing slash, because we will use urljoin to determine
        # the path to
        # PULP_MANIFEST later
        if self._repo_url[-1] != '/':
            self._repo_url = self._repo_url + '/'

        # Cast our config parameters to the correct types and use them to build a Downloader
        max_speed = config.get(importer_constants.KEY_MAX_SPEED)
        if max_speed is not None:
            max_speed = float(max_speed)
        max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS)
        if max_downloads is not None:
            max_downloads = int(max_downloads)
        else:
            max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT
        ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION)
        ssl_validation = ssl_validation if ssl_validation is not None else \
            constants.CONFIG_VALIDATE_DEFAULT
        downloader_config = {
            'max_speed': max_speed,
            'max_concurrent': max_downloads,
            'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT),
            'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY),
            'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT),
            'ssl_validation': ssl_validation,
            'proxy_url': config.get(importer_constants.KEY_PROXY_HOST),
            'proxy_port': config.get(importer_constants.KEY_PROXY_PORT),
            'proxy_username': config.get(importer_constants.KEY_PROXY_USER),
            'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)}
        downloader_config = DownloaderConfig(**downloader_config)

        # We will pass self as the event_listener, so that we can receive the callbacks in this
        # class
        if self._repo_url.lower().startswith('file'):
            self.downloader = LocalFileDownloader(downloader_config, self)
        else:
            self.downloader = HTTPThreadedDownloader(downloader_config, self)
        self.progress_report = SyncProgressReport(sync_conduit)

    def cancel_sync(self):
        """
        This method will cancel a sync that is in progress.
        """
        # We used to support sync cancellation, but the current downloader implementation does
        # not support it
        # and so for now we will just pass
        self.progress_report.state = self.progress_report.STATE_CANCELLED
        self.downloader.cancel()

    def download_failed(self, report):
        """
        This is the callback that we will get from the downloader library when any individual
        download fails.
        """
        # If we have a download failure during the manifest phase, we should set the report to
        # failed for that phase.
        msg = _('Failed to download %(url)s: %(error_msg)s.')
        msg = msg % {'url': report.url, 'error_msg': report.error_msg}
        logger.error(msg)
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS:
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            self.progress_report.error_message = report.error_report
        elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            self.progress_report.add_failed_iso(iso, report.error_report)
        self.progress_report.update_progress()

    def download_progress(self, report):
        """
        We will get notified from time to time about some bytes we've downloaded. We can update
        our progress
        report with this information so the client can see the progress.

        :param report: The report of the file we are downloading
        :type  report: nectar.report.DownloadReport
        """
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            iso = report.data
            additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded
            self.progress_report.finished_bytes += additional_bytes_downloaded
            iso.bytes_downloaded = report.bytes_downloaded
            self.progress_report.update_progress()

    def download_succeeded(self, report):
        """
        This is the callback that we will get from the downloader library when it succeeds in
        downloading a file. This method will check to see if we are in the ISO downloading stage,
        and if we are, it will add the new ISO to the database.

        :param report: The report of the file we downloaded
        :type  report: nectar.report.DownloadReport
        """
        # If we are in the isos stage, then this must be one of our ISOs.
        if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS:
            # This will update our bytes downloaded
            self.download_progress(report)
            iso = report.data
            try:
                if self._validate_downloads:
                    iso.validate()
                iso.save_unit(self.sync_conduit)
                # We can drop this ISO from the url --> ISO map
                self.progress_report.num_isos_finished += 1
                self.progress_report.update_progress()
            except ValueError:
                self.download_failed(report)

    def perform_sync(self):
        """
        Perform the sync operation according to the config, and return a report.
        The sync progress will be reported through the sync_conduit.

        :return:             The sync report
        :rtype:              pulp.plugins.model.SyncReport
        """
        # Get the manifest and download the ISOs that we are missing
        self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS
        try:
            manifest = self._download_manifest()
        except (IOError, ValueError):
            # The IOError will happen if the file can't be retrieved at all, and the ValueError will
            # happen if the PULP_MANIFEST file isn't in the expected format.
            return self.progress_report.build_final_report()

        # Discover what files we need to download and what we already have
        filtered_isos = self._filter_missing_isos(manifest)
        local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos

        # Associate units that are already in Pulp
        if local_available_isos:
            search_dicts = [unit.unit_key for unit in local_available_isos]
            self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts)

        # Go get them filez
        self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS
        self._download_isos(local_missing_isos)
        if self._remove_missing_units:
            self._remove_units(remote_missing_isos)

        # Report that we are finished. Note that setting the
        # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the
        # progress report has collected any errors. See the progress_report's _set_state() method
        # for the implementation of this logic.
        self.progress_report.state = self.progress_report.STATE_COMPLETE
        report = self.progress_report.build_final_report()
        return report

    def _download_isos(self, manifest):
        """
        Makes the calls to retrieve the ISOs from the manifest, storing them on disk and
        recording them in the Pulp database.

        :param manifest: The manifest containing a list of ISOs we want to download.
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        """
        self.progress_report.total_bytes = 0
        self.progress_report.num_isos = len(manifest)
        # For each ISO in the manifest, we need to determine a relative path where we want
        # it to be stored, and initialize the Unit that will represent it
        for iso in manifest:
            iso.init_unit(self.sync_conduit)
            iso.bytes_downloaded = 0
            # Set the total bytes onto the report
            self.progress_report.total_bytes += iso.size
        self.progress_report.update_progress()
        # We need to build a list of DownloadRequests
        download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for
                             iso in manifest]
        self.downloader.download(download_requests)

    def _download_manifest(self):
        """
        Download the manifest file, and process it to return an ISOManifest.

        :return: manifest of available ISOs
        :rtype:  pulp_rpm.plugins.db.models.ISOManifest
        """
        manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME)
        # I probably should have called this manifest destination, but I couldn't help myself
        manifest_destiny = StringIO()
        manifest_request = request.DownloadRequest(manifest_url, manifest_destiny)
        self.downloader.download([manifest_request])
        # We can inspect the report status to see if we had an error when retrieving the manifest.
        if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED:
            raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url})

        manifest_destiny.seek(0)
        try:
            manifest = models.ISOManifest(manifest_destiny, self._repo_url)
        except ValueError:
            self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +
                                                   'expected format.')
            self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED
            raise ValueError(self.progress_report.error_message)

        return manifest

    def _filter_missing_isos(self, manifest):
        """
        Use the sync_conduit and the manifest to determine which ISOs are at the feed_url
        that are not in our local store, as well as which ISOs are in our local store that are not
        available at the feed_url.

        :param manifest: An ISOManifest describing the ISOs that are available at the
                         feed_url that we are synchronizing with
        :type  manifest: pulp_rpm.plugins.db.models.ISOManifest
        :return:         A 3-tuple. The first element of the tuple is a list of ISOs that we should
                         retrieve from the feed_url. The second element of the tuple is a list of
                         Units that are available locally already, but are not currently associated
                         with the repository. The third element of the tuple is a list of Units that
                         represent the ISOs that we have in our local repo that were not found in
                         the remote repo.
        :rtype:          tuple
        """

        def _unit_key_str(iso):
            """
            Return a simple string representation of the unit key of the ISO.

            :param iso: The ISO for which we want a unit key string representation
            :type  iso: pulp_rpm.plugins.db.models.ISO
            """
            return '%s-%s-%s' % (iso.name, iso.checksum, iso.size)

        # A list of all the ISOs we have in Pulp
        search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO)
        existing_units = self.sync_conduit.search_all_units(models.ISO.TYPE, search_criteria)
        existing_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit)
                                      for unit in existing_units])
        existing_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit))
                                  for unit in existing_units])

        # A list of units currently associated with the repository
        search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE])
        existing_repo_units = self.sync_conduit.get_units(search_criteria)
        existing_repo_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit)
                                           for unit in existing_repo_units])
        existing_repo_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit))
                                       for unit in existing_repo_units])

        # A list of the ISOs in the remote repository
        available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest])
        available_iso_keys = set([_unit_key_str(iso) for iso in manifest])

        # Content that is available locally and just needs to be associated with the repository
        local_available_iso_keys = set([iso for iso in available_iso_keys
                                        if iso in existing_unit_keys])
        local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys
        local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys]

        # Content that is missing locally and must be downloaded
        local_missing_iso_keys = list(available_iso_keys - existing_unit_keys)
        local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys]

        # Content that is missing from the remote repository that is present locally
        remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys)
        remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys]

        return local_missing_isos, local_available_units, remote_missing_units

    def _remove_units(self, units):
        """
        Use the sync_conduit's remove_unit call for each unit in units.

        :param units: List of pulp.plugins.model.Units that we want to remove from the repository
        :type  units: list
        """
        for unit in units:
            self.sync_conduit.remove_unit(unit)
Example #32
0
class V2Repository(object):
    """
    This class represents a Docker v2 repository.
    """
    API_VERSION_CHECK_PATH = '/v2/'
    LAYER_PATH = '/v2/{name}/blobs/{digest}'
    MANIFEST_PATH = '/v2/{name}/manifests/{reference}'
    TAGS_PATH = '/v2/{name}/tags/list'

    def __init__(self, name, download_config, registry_url, working_dir):
        """
        Initialize the V2Repository.

        :param name:            name of a docker repository
        :type  name:            basestring
        :param download_config: download configuration object
        :type  download_config: nectar.config.DownloaderConfig
        :param registry_url:    URL for the docker registry
        :type  registry_url:    basestring
        :param working_dir:     full path to the directory where files should
                                be saved
        :type  working_dir:     basestring
        """

        # Docker's registry aligns non-namespaced images to the library namespace.
        # if we have a docker registry image, and no namespace, add the library
        # namespace to the image name.

        if '/' not in name and re.search(r'registry[-,\w]*.docker.io', registry_url, re.IGNORECASE):
            self.name = "library/" + name
        else:
            self.name = name

        self.download_config = download_config
        self.registry_url = registry_url

        # Use basic auth information only for retrieving tokens from auth server.
        self.token_downloader = HTTPThreadedDownloader(self.download_config,
                                                       AggregatingEventListener())
        self.download_config.basic_auth_username = None
        self.download_config.basic_auth_password = None
        self.downloader = HTTPThreadedDownloader(self.download_config, AggregatingEventListener())
        self.working_dir = working_dir
        self.token = None

    def api_version_check(self):
        """
        Make a call to the registry URL's /v2/ API call to determine if the registry supports API
        v2.

        :return: True if the v2 API is found, else False
        :rtype:  bool
        """
        _logger.debug('Determining if the registry URL can do v2 of the Docker API.')

        try:
            headers, body = self._get_path(self.API_VERSION_CHECK_PATH)
        except IOError:
            return False

        try:
            version = headers['Docker-Distribution-API-Version']
            if version != "registry/2.0":
                return False
            _logger.debug(_('The docker registry is using API version: %(v)s') % {'v': version})
        except KeyError:
            # If the Docker-Distribution-API-Version header isn't present, we will assume that this
            # is a valid Docker 2.0 API server so that simple file-based webservers can serve as our
            # remote feed.
            pass

        return True

    def create_blob_download_request(self, digest):
        """
        Return a DownloadRequest instance for the given blob digest.
        It is desirable to download the blob files with a separate
        downloader (for progress tracking, etc), so we just create the download
        requests here and let them get processed elsewhere.

        :param digest:          digest of the docker blob you wish to download
        :type  digest:          basestring

        :return:    a download request instance
        :rtype:     nectar.request.DownloadRequest
        """
        path = self.LAYER_PATH.format(name=self.name, digest=digest)
        url = urlparse.urljoin(self.registry_url, path)
        req = DownloadRequest(url, os.path.join(self.working_dir, digest))
        return req

    def get_manifest(self, reference):
        """
        Get the manifest and its digest for the given reference.

        :param reference: The reference (tag or digest) of the Manifest you wish to retrieve.
        :type  reference: basestring
        :return:          A 2-tuple of the digest and the manifest, both basestrings
        :rtype:           tuple
        """
        path = self.MANIFEST_PATH.format(name=self.name, reference=reference)
        headers, manifest = self._get_path(path)

        digest_header = 'docker-content-digest'
        if digest_header in headers:
            expected_digest = headers[digest_header]
            # The digest is formatted as algorithm:sum, so let's ask our hasher to use the same
            # algorithm as we received in the headers.
            digest = models.Manifest.calculate_digest(manifest, expected_digest.split(':')[0])
            if digest != expected_digest:
                msg = _('The Manifest digest does not match the expected value. The remote '
                        'feed announced a digest of {e}, but the downloaded digest was {d}.')
                msg = msg.format(e=expected_digest, d=digest)
                raise IOError(msg)
        else:
            digest = models.Manifest.calculate_digest(manifest)
        return digest, manifest

    def get_tags(self):
        """
        Get a list of the available tags in the repository.

        :return: A list of basestrings of the available tags in the repository.
        :rtype:  list
        """
        path = self.TAGS_PATH.format(name=self.name)
        _logger.debug('retrieving tags from remote registry')
        try:
            headers, tags = self._get_path(path)
        except IOError as e:
            raise pulp_exceptions.PulpCodedException(error_code=error_codes.DKR1007,
                                                     repo=self.name,
                                                     registry=self.registry_url,
                                                     reason=str(e))
        return json.loads(tags)['tags']

    def _get_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and
        the response body.

        :param path: a full http path to retrieve that will be urljoin'd to the upstream registry
                     url.
        :type  path: basestring

        :return:     (headers, response body)
        :rtype:      tuple
        """
        url = urlparse.urljoin(self.registry_url, path)
        _logger.debug(_('Retrieving {0}'.format(url)))
        request = DownloadRequest(url, StringIO())

        if self.token:
            request.headers = token_util.update_auth_header(request.headers, self.token)

        report = self.downloader.download_one(request)

        # If the download was unauthorized, attempt to get a token and try again
        if report.state == report.DOWNLOAD_FAILED:
            if report.error_report.get('response_code') == httplib.UNAUTHORIZED:
                _logger.debug(_('Download unauthorized, attempting to retrieve a token.'))
                self.token = token_util.request_token(self.token_downloader, request,
                                                      report.headers)
                request.headers = token_util.update_auth_header(request.headers, self.token)
                report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            self._raise_path_error(report)

        return report.headers, report.destination.getvalue()

    @staticmethod
    def _raise_path_error(report):
        """
        Raise an exception with an appropriate error message.

        Specifically because docker hub responds with a 401 for repositories that don't exist, pulp
        cannot disambiguate Unauthorized vs. Not Found. This function tries to make an error message
        that is clear on that point.

        :param report:  download report
        :type  report:  nectar.report.DownloadReport

        :raises IOError:    always, with an appropriate message based on the report
        """
        if report.error_report.get('response_code') == httplib.UNAUTHORIZED:
            # docker hub returns 401 for repos that don't exist, so we cannot disambiguate.
            raise IOError(_('Unauthorized or Not Found'))
        else:
            raise IOError(report.error_msg)