Beispiel #1
0
    def _get_single_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return its
        body after deserializing it as json

        :param path:    a full http path to retrieve that will be urljoin'd to the
                        upstream registry url.
        :type  path:    basestring

        :return:    whatever gets deserialized out of the response body's json
        """
        url = urlparse.urljoin(self.registry_url, path)
        request = DownloadRequest(url, StringIO())
        if path.endswith('/images'):
            # this is required by the docker index and indicates that it should
            # return an auth token
            if request.headers is None:
                request.headers = {}
            request.headers[self.DOCKER_TOKEN_HEADER] = 'true'
        report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        self._parse_response_headers(report.headers)
        return json.loads(report.destination.getvalue())
Beispiel #2
0
    def _get_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and
        the response body.

        :param path: a full http path to retrieve that will be urljoin'd to the upstream registry
                     url.
        :type  path: basestring

        :return:     (headers, response body)
        :rtype:      tuple
        """
        url = urlparse.urljoin(self.registry_url, path)
        _logger.debug(_('Retrieving {0}'.format(url)))
        request = DownloadRequest(url, StringIO())

        if self.token:
            request.headers = token_util.update_auth_header(request.headers, self.token)

        report = self.downloader.download_one(request)

        # If the download was unauthorized, attempt to get a token and try again
        if report.state == report.DOWNLOAD_FAILED:
            if report.error_report.get('response_code') == httplib.UNAUTHORIZED:
                _logger.debug(_('Download unauthorized, attempting to retrieve a token.'))
                self.token = token_util.request_token(self.token_downloader, request,
                                                      report.headers)
                request.headers = token_util.update_auth_header(request.headers, self.token)
                report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            self._raise_path_error(report)

        return report.headers, report.destination.getvalue()
Beispiel #3
0
    def _get_path(self, path, headers=None):
        """
        Retrieve a single path within the upstream registry, and return a 2-tuple of the headers and
        the response body.

        :param path: a full http path to retrieve that will be urljoin'd to the upstream registry
                     url.
        :type  path: basestring
        :param headers: headers sent in the request
        :type headers:  dict

        :return:     (headers, response body)
        :rtype:      tuple
        """
        url = urlparse.urljoin(self.registry_url, path)
        _logger.debug(_('Retrieving {0}'.format(url)))
        request = DownloadRequest(url, StringIO())
        request.headers = headers

        if self.token:
            request.headers = auth_util.update_token_auth_header(
                request.headers, self.token)

        report = self.downloader.download_one(request)

        # If the download was unauthorized, check report header, if basic auth is expected
        # retry with basic auth, otherwise attempt to get a token and try again
        if report.state == report.DOWNLOAD_FAILED:
            if report.error_report.get(
                    'response_code') == httplib.UNAUTHORIZED:
                auth_header = report.headers.get('www-authenticate')
                if auth_header is None:
                    raise IOError("401 responses are expected to "
                                  "contain authentication information")
                elif "Basic" in auth_header:
                    _logger.debug(
                        _('Download unauthorized, retrying with basic authentication'
                          ))
                    report = self.auth_downloader.download_one(request)
                else:
                    _logger.debug(
                        _('Download unauthorized, attempting to retrieve a token.'
                          ))
                    self.token = auth_util.request_token(
                        self.auth_downloader, request, auth_header, self.name)
                    request.headers = auth_util.update_token_auth_header(
                        request.headers, self.token)
                    report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            # this condition was added in case the registry would not allow to access v2 endpoint
            # but still token would be valid for other endpoints.
            # see https://pulp.plan.io/issues/2643
            if path == '/v2/' and report.error_report.get(
                    'response_code') == httplib.UNAUTHORIZED:
                pass
            else:
                self._raise_path_error(report)

        return report.headers, report.destination.getvalue()
    def test_request_cancel(self, mock_from_request):
        url = 'http://fakeurl/robots.txt'
        req = DownloadRequest(url, mock.Mock())
        req.canceled = True

        self.downloader._fetch(req)
        mock_from_request.return_value.download_canceled.assert_called_once_with()
Beispiel #5
0
    def _get_single_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return its
        body after deserializing it as json

        :param path:    a full http path to retrieve that will be urljoin'd to the
                        upstream registry url.
        :type  path:    basestring

        :return:    whatever gets deserialized out of the response body's json
        """
        url = urlparse.urljoin(self.registry_url, path)
        request = DownloadRequest(url, StringIO())
        if path.endswith('/images'):
            # this is required by the docker index and indicates that it should
            # return an auth token
            if request.headers is None:
                request.headers = {}
            request.headers[self.DOCKER_TOKEN_HEADER] = 'true'
        report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        self._parse_response_headers(report.headers)
        return json.loads(report.destination.getvalue())
    def test_request_cancel(self, mock_from_request):
        url = 'http://fakeurl/robots.txt'
        req = DownloadRequest(url, mock.Mock())
        req.canceled = True

        self.downloader._fetch(req)
        mock_from_request.return_value.download_canceled.assert_called_once_with(
        )
Beispiel #7
0
    def test_copy_canceled_single_request(self, mock_canceled, mock_open):
        downloader = local.LocalFileDownloader(DownloaderConfig())
        request = DownloadRequest('file://' + __file__, '/bar')
        request.canceled = True

        downloader._copy(request)

        # make sure the cancel method was called on the report
        mock_canceled.assert_called_once_with()
        # make sure the no writing was attempted
        self.assertEqual(mock_open.return_value.write.call_count, 0)
Beispiel #8
0
def _create_download_requests(content_units):
    """
    Make a list of Nectar DownloadRequests for the given content units using
    the lazy catalog.

    :param content_units: The content units to build a list of DownloadRequests for.
    :type  content_units: list of pulp.server.db.model.FileContentUnit

    :return: A list of DownloadRequests; each request includes a ``data``
             instance variable which is a dict containing the FileContentUnit,
             the list of files in the unit, and the downloaded file's storage
             path.
    :rtype:  list of nectar.request.DownloadRequest
    """
    requests = []
    working_dir = get_working_directory()
    signing_key = Key.load(pulp_conf.get('authentication', 'rsa_key'))

    for content_unit in content_units:
        # All files in the unit; every request for a unit has a reference to this dict.
        unit_files = {}
        unit_working_dir = os.path.join(working_dir, content_unit.id)
        for file_path in content_unit.list_files():
            qs = LazyCatalogEntry.objects.filter(
                unit_id=content_unit.id,
                unit_type_id=content_unit.type_id,
                path=file_path
            )
            catalog_entry = qs.order_by('revision').first()
            if catalog_entry is None:
                continue
            signed_url = _get_streamer_url(catalog_entry, signing_key)

            temporary_destination = os.path.join(
                unit_working_dir,
                os.path.basename(catalog_entry.path)
            )
            mkdir(unit_working_dir)
            unit_files[temporary_destination] = {
                CATALOG_ENTRY: catalog_entry,
                PATH_DOWNLOADED: None,
            }

            request = DownloadRequest(signed_url, temporary_destination)
            # For memory reasons, only hold onto the id and type_id so we can reload the unit
            # once it's successfully downloaded.
            request.data = {
                TYPE_ID: content_unit.type_id,
                UNIT_ID: content_unit.id,
                UNIT_FILES: unit_files,
            }
            requests.append(request)

    return requests
Beispiel #9
0
    def test_copy_canceled_single_request(self, mock_canceled, mock_open):
        downloader = local.LocalFileDownloader(DownloaderConfig())
        request = DownloadRequest('file://' + __file__, '/bar')
        request.canceled = True

        downloader._copy(request)

        # make sure the cancel method was called on the report
        mock_canceled.assert_called_once_with()
        # make sure the no writing was attempted
        self.assertEqual(mock_open.return_value.write.call_count, 0)
Beispiel #10
0
def _create_download_requests(content_units):
    """
    Make a list of Nectar DownloadRequests for the given content units using
    the lazy catalog.

    :param content_units: The content units to build a list of DownloadRequests for.
    :type  content_units: list of pulp.server.db.model.FileContentUnit

    :return: A list of DownloadRequests; each request includes a ``data``
             instance variable which is a dict containing the FileContentUnit,
             the list of files in the unit, and the downloaded file's storage
             path.
    :rtype:  list of nectar.request.DownloadRequest
    """
    requests = []
    working_dir = get_working_directory()
    signing_key = Key.load(pulp_conf.get('authentication', 'rsa_key'))

    for content_unit in content_units:
        # All files in the unit; every request for a unit has a reference to this dict.
        unit_files = {}
        unit_working_dir = os.path.join(working_dir, content_unit.id)
        for file_path in content_unit.list_files():
            qs = LazyCatalogEntry.objects.filter(
                unit_id=content_unit.id,
                unit_type_id=content_unit.type_id,
                path=file_path)
            catalog_entry = qs.order_by('revision').first()
            if catalog_entry is None:
                continue
            signed_url = _get_streamer_url(catalog_entry, signing_key)

            temporary_destination = os.path.join(
                unit_working_dir, os.path.basename(catalog_entry.path))
            mkdir(unit_working_dir)
            unit_files[temporary_destination] = {
                CATALOG_ENTRY: catalog_entry,
                PATH_DOWNLOADED: None,
            }

            request = DownloadRequest(signed_url, temporary_destination)
            # For memory reasons, only hold onto the id and type_id so we can reload the unit
            # once it's successfully downloaded.
            request.data = {
                TYPE_ID: content_unit.type_id,
                UNIT_ID: content_unit.id,
                UNIT_FILES: unit_files,
            }
            requests.append(request)

    return requests
Beispiel #11
0
    def _download(self, urls):
        """
        Download files by URL.

        Encapsulates nectar details and provides a simplified method of downloading files.

        :param urls: A list of tuples: (url, destination).  The *url* and *destination* are both
                     strings.  The *destination* is the fully qualified path to where the file is
                     to be downloaded.
        :type urls: list

        :return: The nectar reports.  Tuple of: (succeeded_reports, failed_reports)
        :rtype: tuple
        """
        feed_url = self.feed_url()
        nectar_config = importer_config_to_nectar_config(self.config.flatten())
        nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme]
        downloader = nectar_class(nectar_config)
        listener = DownloadListener(self, downloader)

        request_list = []
        for url, destination in urls:
            request_list.append(DownloadRequest(url, destination))
        downloader.download(request_list)
        nectar_config.finalize()

        for report in listener.succeeded_reports:
            _logger.info(FETCH_SUCCEEDED,
                         dict(url=report.url, dst=report.destination))
        for report in listener.failed_reports:
            _logger.error(FETCH_FAILED,
                          dict(url=report.url, msg=report.error_msg))

        return listener.succeeded_reports, listener.failed_reports
Beispiel #12
0
    def _parse_as_mirrorlist(self, feed):
        """
        Treats the provided feed as mirrorlist. Parses its content and extracts
        urls to sync.

        :param feed: feed that should be treated as mirrorlist
        :type:       str

        :return:    list the URLs received from the mirrorlist
        :rtype:     list
        """
        url_file = StringIO()
        downloader = nectar_factory.create_downloader(feed, self.nectar_config)
        request = DownloadRequest(feed, url_file)
        downloader.download_one(request)
        url_file.seek(0)
        url_parse = url_file.read().split('\n')
        repo_url = []
        # Due to the fact, that format of mirrorlist can be different, this regex
        # matches the cases when the url is not commented out and does not have any
        # punctuation characters in front.
        pattern = re.compile("(^|^[\w\s=]+\s)((http(s)?)://.*)")
        for line in url_parse:
            for match in re.finditer(pattern, line):
                repo_url.append(match.group(2))
        random.shuffle(repo_url)
        return repo_url
Beispiel #13
0
def get_distribution_file(feed, tmp_dir, nectar_config):
    """
    Download the pulp_distribution.xml and return its full path on disk, or None if not found

    :param feed:            URL to the repository
    :type  feed:            str
    :param tmp_dir:         full path to the temporary directory being used
    :type  tmp_dir:         str
    :param nectar_config:   download config to be used by nectar
    :type  nectar_config:   nectar.config.DownloaderConfig

    :return:        full path to distribution file on disk, or None if not found
    :rtype:         str or NoneType
    """
    filename = constants.DISTRIBUTION_XML

    path = os.path.join(tmp_dir, filename)
    url = os.path.join(feed, filename)
    request = DownloadRequest(url, path)
    listener = AggregatingEventListener()
    downloader = nectar_factory.create_downloader(feed, nectar_config,
                                                  listener)
    downloader.download([request])
    if len(listener.succeeded_reports) == 1:
        return path

    return None
Beispiel #14
0
def get_treefile(feed, tmp_dir, nectar_config):
    """
    Download the treefile and return its full path on disk, or None if not found

    :param feed:            URL to the repository
    :type  feed:            str
    :param tmp_dir:         full path to the temporary directory being used
    :type  tmp_dir:         str
    :param nectar_config:   download config to be used by nectar
    :type  nectar_config:   nectar.config.DownloaderConfig

    :return:        full path to treefile on disk, or None if not found
    :rtype:         str or NoneType
    """
    for filename in constants.TREE_INFO_LIST:
        path = os.path.join(tmp_dir, filename)
        url = os.path.join(feed, filename)
        request = DownloadRequest(url, path)
        listener = AggregatingEventListener()
        downloader = nectar_factory.create_downloader(feed, nectar_config,
                                                      listener)
        downloader.download([request])
        if len(listener.succeeded_reports) == 1:
            # bz 1095829
            strip_treeinfo_repomd(path)
            return path
Beispiel #15
0
def file_to_download_request(file_dict, feed, storage_path):
    """
    Takes information about a file described in a treeinfo file and turns that
    into a download request suitable for use with nectar.

    :param file_dict:       dict containing keys 'relativepath', 'checksum',
                            and 'checksumtype'.
    :type  file_dict:       dict
    :param feed:            URL to the base of a repository
    :type  feed:            basestring
    :param storage_path:    full filesystem path to where the downloaded files
                            should be saved.
    :type  storage_path:    basestring

    :return:    new download request
    :rtype:     nectar.request.DownloadRequest
    """
    savepath = os.path.join(storage_path, file_dict['relativepath'])
    # make directories such as "images"
    if not os.path.exists(os.path.dirname(savepath)):
        os.makedirs(os.path.dirname(savepath))

    return DownloadRequest(
        os.path.join(feed, file_dict['relativepath']),
        savepath,
        file_dict,
    )
Beispiel #16
0
    def create_download_request(self, image_id, file_name, destination_dir):
        """
        Return a DownloadRequest instance for the given file name and image ID.
        It is desirable to download the actual layer files with a separate
        downloader (for progress tracking, etc), so we just create the download
        requests here and let them get processed elsewhere.

        This adds the Authorization header if a token is known for this
        repository.

        :param image_id:        unique ID of a docker image
        :type  image_id:        basestring
        :param file_name:       name of the file, one of "ancestry", "json",
                                or "layer"
        :type  file_name:       basestring
        :param destination_dir: full path to the directory where file should
                                be saved
        :type  destination_dir: basestring

        :return:    a download request instance
        :rtype:     nectar.request.DownloadRequest
        """
        url = self.get_image_url()
        req = DownloadRequest(
            urlparse.urljoin(url, '/v1/images/%s/%s' % (image_id, file_name)),
            os.path.join(destination_dir, file_name))
        self.add_auth_header(req)
        return req
Beispiel #17
0
    def download_metadata_files(self):
        """
        Download the remaining metadata files.
        """
        if not self.metadata:
            raise RuntimeError('%s has not been parsed' % REPOMD_FILE_NAME)

        download_request_list = []

        for file_name, file_info in self.metadata.iteritems():
            # we don't care about the sqlite files
            if file_name.endswith('_db') and file_name in self.KNOWN_TYPES:
                continue
            url = self._url_modify(self.repo_url,
                                   path_append=file_info['relative_path'])
            dst = os.path.join(self.dst_dir,
                               file_info['relative_path'].rsplit('/', 1)[-1])

            file_info['local_path'] = dst

            request = DownloadRequest(url, dst)
            download_request_list.append(request)

        self.downloader.download(download_request_list)
        if self.event_listener.failed_reports:
            error_report = self.event_listener.failed_reports[0]
            raise IOError(error_report.error_msg)
Beispiel #18
0
    def process_main(self, item=None):
        wdir = os.path.join(self.get_working_dir())
        csums_to_download = dict(
            (u.checksum, u)
            for u in self.parent.step_local_units.units_to_download)
        repometa = self.parent.apt_repo_meta
        reqs = []

        # upstream_url points to the dist itself, dists/stable
        upstream_url = repometa.upstream_url.rstrip('/')
        upstream_url = os.path.dirname(os.path.dirname(upstream_url))

        step_download_units = self.parent.step_download_units
        step_download_units.path_to_unit = dict()
        for ca in repometa.iter_component_arch_binaries():
            dest_dir = os.path.join(wdir, "packages", ca.component)
            misc.mkdir(dest_dir)

            for pkg in ca.iter_packages():
                unit = csums_to_download.get(pkg['SHA256'])
                if not unit:
                    continue
                url = os.path.join(upstream_url, pkg['Filename'])
                dest = os.path.join(dest_dir, os.path.basename(url))
                reqs.append(DownloadRequest(url, dest))
                step_download_units.path_to_unit[dest] = unit

        step_download_units._downloads = reqs
    def test_fetch_with_timeout(self):
        """
        Test that the report state is failed and that the baseurl can be tried again.
        """

        # requests.ConnectionError
        def timeout(*args, **kwargs):
            raise Timeout()

        with mock.patch('nectar.downloaders.threaded._logger') as mock_logger:
            URL = 'http://pulpproject.org/primary.xml'
            req = DownloadRequest(URL, StringIO())
            session = threaded.build_session(self.config)
            session.get = timeout
            report = self.downloader._fetch(req, session)

            self.assertEqual(report.state, report.DOWNLOAD_FAILED)
            self.assertNotIn('pulpproject.org', self.downloader.failed_netlocs)

            session2 = threaded.build_session(self.config)
            session2.get = mock.MagicMock()
            report2 = self.downloader._fetch(req, session2)

            self.assertEqual(report2.state, report2.DOWNLOAD_FAILED)
            self.assertEqual(session2.get.call_count, 1)

            expected_log_message = "Request Timeout - Connection with " \
                                   "http://pulpproject.org/primary.xml timed out."
            log_calls = [
                mock_call[1][0] for mock_call in mock_logger.mock_calls
            ]

            self.assertIn(expected_log_message, log_calls)
Beispiel #20
0
    def get_ancestry(self, image_ids):
        """
        Retrieve the "ancestry" file for each provided image ID, and save each
        in a directory whose name is the image ID.

        :param image_ids:   list of image IDs for which the ancestry file
                            should be retrieved
        :type  image_ids:   list

        :raises IOError:    if a download fails
        """
        requests = []
        for image_id in image_ids:
            path = self.ANCESTRY_PATH % image_id
            url = urlparse.urljoin(self.get_image_url(), path)
            destination = os.path.join(self.working_dir, image_id, 'ancestry')
            try:
                os.mkdir(os.path.split(destination)[0])
            except OSError, e:
                # it's ok if the directory already exists
                if e.errno != errno.EEXIST:
                    raise
            request = DownloadRequest(url, destination)
            self.add_auth_header(request)
            requests.append(request)
    def test_fetch_with_connection_error_badstatusline(self):
        """
        Test that the baseurl is tried again if ConnectionError reason BadStatusLine happened.
        """

        # requests.ConnectionError
        def connection_error(*args, **kwargs):
            raise ConnectionError('Connection aborted.',
                                  httplib.BadStatusLine("''", ))

        with mock.patch('nectar.downloaders.threaded._logger') as mock_logger:
            URL = 'http://fakeurl/primary.xml'
            req = DownloadRequest(URL, StringIO())
            self.session.get.side_effect = connection_error

            self.downloader._fetch(req)

            self.assertEqual(self.session.get.call_count, 2)

            expected_log_msg = [
                'Attempting to connect to http://fakeurl/primary.xml.',
                'Download of http://fakeurl/primary.xml failed. Re-trying.',
                'Re-trying http://fakeurl/primary.xml due to remote server '
                'connection failure.',
                'Download of http://fakeurl/primary.xml failed. Re-trying.',
                'Download of http://fakeurl/primary.xml failed and reached '
                'maximum retries'
            ]
            log_calls = [
                mock_call[1][0] for mock_call in mock_logger.mock_calls
            ]

            self.assertEqual(expected_log_msg, log_calls)
    def test_fetch_with_timeout(self):
        """
        Test that the report state is failed and that the baseurl can be tried again.
        """

        with mock.patch('nectar.downloaders.threaded._logger') as mock_logger:
            URL = 'http://fakeurl/primary.xml'
            req = DownloadRequest(URL, StringIO())
            self.session.get.side_effect = Timeout
            report = self.downloader._fetch(req)

            self.assertEqual(report.state, report.DOWNLOAD_FAILED)
            self.assertNotIn('fakeurl', self.downloader.failed_netlocs)

            session2 = threaded.build_session(self.config)
            session2.get = mock.MagicMock()
            report2 = self.downloader._fetch(req)

            self.assertEqual(report2.state, report2.DOWNLOAD_FAILED)
            self.assertEqual(self.session.get.call_count, 2)

            expected_log_message = "Request Timeout - Connection with " \
                                   "http://fakeurl/primary.xml timed out."
            log_calls = [
                mock_call[1][0] for mock_call in mock_logger.mock_calls
            ]

            self.assertIn(expected_log_message, log_calls)
Beispiel #23
0
    def retrieve_metadata(self, progress_report):
        """
        Retrieves all metadata documents needed to fulfill the configuration set for the
        repository. The progress report will be updated as the downloads take place.

        :param progress_report: used to communicate the progress of this operation
        :type progress_report: pulp_puppet.importer.sync_progress.ProgressReport

        :return: list of JSON documents describing all modules to import
        :rtype: list
        """
        urls = self._create_metadata_download_urls()

        # Update the progress report to reflect the number of queries it will take
        progress_report.metadata_query_finished_count = 0
        progress_report.metadata_query_total_count = len(urls)

        listener = HTTPMetadataDownloadEventListener(progress_report)
        self.downloader = self._create_and_configure_downloader(listener)

        request_list = [DownloadRequest(url, StringIO()) for url in urls]

        # Let any exceptions from this bubble up, the caller will update
        # the progress report as necessary
        try:
            self.downloader.download(request_list)
        finally:
            self.downloader.config.finalize()
            self.downloader = None

        for report in listener.failed_reports:
            raise exceptions.FileRetrievalException(report.error_msg)

        return [r.destination.getvalue() for r in request_list]
    def test_fetch_with_connection_error(self):
        """
        Test that the report state is failed and that the baseurl is not tried again.
        """

        # requests.ConnectionError
        def connection_error(*args, **kwargs):
            raise ConnectionError()

        with mock.patch('nectar.downloaders.threaded._logger') as mock_logger:
            URL = 'http://fakeurl/primary.xml'
            req = DownloadRequest(URL, StringIO())
            self.session.get = connection_error
            try:
                report = self.downloader._fetch(req)
            except ConnectionError:
                raise AssertionError("ConnectionError should be raised")

            self.assertEqual(report.state, report.DOWNLOAD_FAILED)
            self.assertIn('fakeurl', self.downloader.failed_netlocs)

            report2 = self.downloader._fetch(req)

            self.assertEqual(report2.state, report2.DOWNLOAD_FAILED)

            expected_log_message = "Connection Error - http://fakeurl/primary.xml " \
                                   "could not be reached."
            log_calls = [
                mock_call[1][0] for mock_call in mock_logger.mock_calls
            ]

            self.assertIn(expected_log_message, log_calls)
Beispiel #25
0
 def process_main(self, item=None):
     releases = self.parent.releases
     components = self.parent.components
     architectures = self.parent.architectures
     dl_reqs = []
     for release in releases:
         self.verify_release(release)
         # generate repo_metas for Releases
         self.parent.apt_repo_meta[
             release] = repometa = aptrepo.AptRepoMeta(
                 release=open(self.parent.release_files[release], "rb"),
                 upstream_url=self.parent.feed_urls[release])
         # get release unit
         codename = repometa.codename
         suite = repometa.release.get('suite')
         rel_unit = self.parent.release_units[release] = models.DebRelease.\
             get_or_create_and_associate(self.parent.repo, codename, suite)
         # Prevent this unit from being cleaned up
         try:
             self.parent.deb_releases_to_check.remove(rel_unit)
         except ValueError:
             pass
         # get release component units
         for component in repometa.components:
             if components is None or component.split(
                     '/')[-1] in components:
                 comp_unit = self.parent.component_units[release][component] = \
                     models.DebComponent.get_or_create_and_associate(self.parent.repo,
                                                                     rel_unit,
                                                                     component)
                 self.parent.component_packages[release][component] = []
                 # Prevent this unit from being cleaned up
                 try:
                     self.parent.deb_comps_to_check.remove(comp_unit)
                 except ValueError:
                     pass
         # generate download requests for all relevant packages files
         rel_dl_reqs = repometa.create_Packages_download_requests(
             self.get_working_dir())
         # Filter the rel_dl_reqs by selected components and architectures
         if components:
             rel_dl_reqs = [
                 dlr for dlr in rel_dl_reqs
                 if dlr.data['component'].split('/')[-1] in components
             ]
         if architectures:
             rel_dl_reqs = [
                 dlr for dlr in rel_dl_reqs
                 if dlr.data['architecture'] in architectures
             ]
         self.parent.packages_urls[release] = set(
             [dlr.url for dlr in rel_dl_reqs])
         dl_reqs.extend(rel_dl_reqs)
     self.parent.step_download_Packages._downloads = [
         DownloadRequest(dlr.url, dlr.destination, data=dlr.data)
         for dlr in dl_reqs
     ]
Beispiel #26
0
    def test_downloads_property(self):
        generator = (DownloadRequest(url, '/a/b/c') for url in ['http://pulpproject.org'])
        dlstep = publish_step.DownloadStep('fake-step', downloads=generator)

        downloads = dlstep.downloads

        self.assertTrue(isinstance(downloads, list))
        self.assertEqual(len(downloads), 1)
        self.assertTrue(isinstance(downloads[0], DownloadRequest))
Beispiel #27
0
    def _make_requests(self, data_file_names=DATA_FILES):
        requests = []

        for d in data_file_names:
            src_url = 'file:/' + os.path.join(DATA_DIR, d)
            dest_path = os.path.join(self.dest_dir, d)
            requests.append(DownloadRequest(src_url, dest_path))

        return requests
Beispiel #28
0
    def test_common_link_canceled(self, mock_canceled):
        downloader = local.LocalFileDownloader(DownloaderConfig())
        downloader.cancel()
        request = DownloadRequest('file://' + __file__, '/bar')

        downloader._common_link(mock.MagicMock(), request)

        # make sure the cancel method was called on the report
        mock_canceled.assert_called_once_with()
Beispiel #29
0
    def __init__(self, repo, conduit, config):
        """
        :param repo:        repository to sync
        :type  repo:        pulp.plugins.model.Repository
        :param conduit:     sync conduit to use
        :type  conduit:     pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config:      config object for the sync
        :type  config:      pulp.plugins.config.PluginCallConfiguration
        """
        super(RepoSync, self).__init__(step_type=constants.SYNC_STEP,
                                       repo=repo,
                                       conduit=conduit,
                                       config=config)
        self.description = _('Syncing Repository')

        self.apt_repo_meta = None
        # https://pulp.plan.io/issues/2765 should remove the need to hardcode
        # the dist/component here
        self.feed_url = self.get_config().get('feed').strip(
            '/') + '/dists/stable/'
        self.release_file = os.path.join(self.get_working_dir(), "Release")
        self.available_units = None
        rel_url = urlparse.urljoin(self.feed_url, 'Release')
        _logger.info("Downloading %s", rel_url)
        self.add_child(
            publish_step.DownloadStep(
                constants.SYNC_STEP_RELEASE_DOWNLOAD,
                plugin_type=ids.TYPE_ID_IMPORTER,
                description=_('Retrieving metadata: release file'),
                downloads=[DownloadRequest(rel_url, self.release_file)]))
        self.add_child(ParseReleaseStep(constants.SYNC_STEP_RELEASE_PARSE))
        self.step_download_Packages = publish_step.DownloadStep(
            constants.SYNC_STEP_PACKAGES_DOWNLOAD,
            plugin_type=ids.TYPE_ID_IMPORTER,
            description=_('Retrieving metadata: Packages files'))
        self.add_child(self.step_download_Packages)

        self.add_child(ParsePackagesStep(constants.SYNC_STEP_PACKAGES_PARSE))

        self.step_local_units = publish_step.GetLocalUnitsStep(
            importer_type=ids.TYPE_ID_IMPORTER)
        self.add_child(self.step_local_units)

        self.add_child(
            CreateRequestsUnitsToDownload(
                constants.SYNC_STEP_UNITS_DOWNLOAD_REQUESTS))

        self.step_download_units = publish_step.DownloadStep(
            constants.SYNC_STEP_UNITS_DOWNLOAD,
            plugin_type=ids.TYPE_ID_IMPORTER,
            description=_('Retrieving units'))
        self.add_child(self.step_download_units)

        self.add_child(SaveDownloadedUnits(constants.SYNC_STEP_SAVE))
Beispiel #30
0
 def download_repomd(self):
     """
     Download the main repomd.xml file.
     """
     repomd_dst_path = os.path.join(self.dst_dir, REPOMD_FILE_NAME)
     repomd_url = urljoin(self.repo_url, REPOMD_URL_RELATIVE_PATH)
     repomd_request = DownloadRequest(repomd_url, repomd_dst_path)
     self.downloader.download([repomd_request])
     if self.event_listener.failed_reports:
         error_report = self.event_listener.failed_reports[0]
         raise IOError(error_report.error_msg)
Beispiel #31
0
    def _get_single_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return its
        body after deserializing it as json

        :param path:    a full http path to retrieve that will be urljoin'd to the
                        upstream registry url.
        :type  path:    basestring

        :return:    whatever gets deserialized out of the response body's json
        """
        # if talking to docker hub, we'll get an endpoint specified, and then we'll have to get
        # tags from that endpoint instead of talking to the original feed URL.
        if self.endpoint:
            # we assume the same scheme that the registry URL used
            registry_url_parts = urlparse.urlsplit(self.registry_url)
            parts = urlparse.SplitResult(scheme=registry_url_parts.scheme,
                                         netloc=self.endpoint,
                                         path=path,
                                         query=None,
                                         fragment=None)
            url = urlparse.urlunsplit(parts)
        else:
            url = urlparse.urljoin(self.registry_url, path)
        request = DownloadRequest(url, StringIO())
        if path.endswith('/images'):
            # this is required by the docker index and indicates that it should
            # return an auth token
            if request.headers is None:
                request.headers = {}
            request.headers[self.DOCKER_TOKEN_HEADER] = 'true'
        # endpoints require auth
        if self.endpoint:
            self.add_auth_header(request)

        report = self.downloader.download_one(request)
        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        self._parse_response_headers(report.headers)
        return json.loads(report.destination.getvalue())
    def test_calls_fetch(self, mock_fetch):
        config = DownloaderConfig()
        request = DownloadRequest('http://foo', StringIO())
        report = DownloadReport.from_download_request(request)
        downloader = threaded.HTTPThreadedDownloader(config)
        mock_fetch.return_value = report

        ret = downloader._download_one(request)

        self.assertEqual(mock_fetch.call_count, 1)
        self.assertTrue(ret is report)
        self.assertTrue(mock_fetch.call_args[0][0] is request)
Beispiel #33
0
    def test_unsupported_url_scheme(self):
        config = DownloaderConfig(use_sym_links=True)
        listener = AggregatingEventListener()
        downloader = local.LocalFileDownloader(config, listener)

        request = DownloadRequest('http://thiswontwork.com',
                                  os.path.join(self.dest_dir, 'doesnt.even.matter'))

        downloader.download([request])

        self.assertEqual(len(listener.succeeded_reports), 0)
        self.assertEqual(len(listener.failed_reports), 1)
Beispiel #34
0
    def test_source_bad_permissions(self):
        config = DownloaderConfig(use_sym_links=True)
        listener = AggregatingEventListener()
        downloader = local.LocalFileDownloader(config, listener)

        request = DownloadRequest('file://root/no',
                                  os.path.join(self.dest_dir, 'doesnt.even.matter'))

        downloader.download([request])

        self.assertEqual(len(listener.succeeded_reports), 0)
        self.assertEqual(len(listener.failed_reports), 1)
Beispiel #35
0
 def __iter__(self):
     """
     Performs a get() on the queue until reaching the end-of-queue marker.
     :return: An iterable of: DownloadRequest.
     :rtype: iterable
     """
     while True:
         item = self.queue.get()
         if item is None:
             # end-of-queue marker
             return
         request = DownloadRequest(item.url, item.request.destination, data=item.request)
         yield request
Beispiel #36
0
    def _get_single_path(self, path):
        """
        Retrieve a single path within the upstream registry, and return its
        body after deserializing it as json

        :param path:    a full http path to retrieve that will be urljoin'd to the
                        upstream registry url.
        :type  path:    basestring

        :return:    whatever gets deserialized out of the response body's json
        """
        # if talking to docker hub, we'll get an endpoint specified, and then we'll have to get
        # tags from that endpoint instead of talking to the original feed URL.
        if self.endpoint:
            # we assume the same scheme that the registry URL used
            registry_url_parts = urlparse.urlsplit(self.registry_url)
            parts = urlparse.SplitResult(scheme=registry_url_parts.scheme, netloc=self.endpoint,
                                         path=path, query=None, fragment=None)
            url = urlparse.urlunsplit(parts)
        else:
            url = urlparse.urljoin(self.registry_url, path)
        request = DownloadRequest(url, StringIO())
        if path.endswith('/images'):
            # this is required by the docker index and indicates that it should
            # return an auth token
            if request.headers is None:
                request.headers = {}
            request.headers[self.DOCKER_TOKEN_HEADER] = 'true'
        # endpoints require auth
        if self.endpoint:
            self.add_auth_header(request)
        report = self.downloader.download_one(request)

        if report.state == report.DOWNLOAD_FAILED:
            raise IOError(report.error_msg)

        self._parse_response_headers(report.headers)
        return json.loads(report.destination.getvalue())