Ejemplo n.º 1
0
    def test_exit_on_exception(self):
        """ Test the exit_on_exception function """

        # test handling of 'Bad Request' server errors
        with patch('sys.stderr', new_callable=StringIO) as stderr_mock:
            expected_message = "source error message"
            orig_ex = RuntimeError("Bad Request")
            ex = exceptions.HttpException(expected_message, orig_ex)
            try:
                raise ex
            except Exception as ex:
                with self.assertRaises(MyExitError):
                    exit_on_exception(ex)
                self.assertTrue(expected_message in stderr_mock.getvalue())

        # test handling of certificate expiration message from server
        with patch('sys.stderr', new_callable=StringIO) as stderr_mock:
            expected_message = "Certificate expired"
            orig_ex = RuntimeError(
                "this message indicates certificate expired")
            ex = exceptions.HttpException(None, orig_ex)
            try:
                raise ex
            except Exception as ex:
                with self.assertRaises(MyExitError):
                    exit_on_exception(ex)
                self.assertTrue(expected_message in stderr_mock.getvalue())

        # test handling of other server error messages
        with patch('sys.stderr', new_callable=StringIO) as stderr_mock:
            expected_message = "other error message"
            orig_ex = RuntimeError(expected_message)
            ex = exceptions.HttpException(None, orig_ex)
            try:
                raise ex
            except Exception as ex:
                with self.assertRaises(MyExitError):
                    exit_on_exception(ex)
                self.assertTrue(expected_message in stderr_mock.getvalue())

        # test handling of other non-server error messages
        with patch('sys.stderr', new_callable=StringIO) as stderr_mock:
            expected_message = "non-server error message"
            ex = RuntimeError(expected_message)
            try:
                raise ex
            except Exception as ex:
                with self.assertRaises(MyExitError):
                    exit_on_exception(ex)
                self.assertTrue(expected_message in stderr_mock.getvalue())
Ejemplo n.º 2
0
    def send(self, request, **kwargs):
        """
        Send a given PreparedRequest, wrapping the connection to service in
        try/except that retries on
        Connection reset by peer.
        :param request: The prepared request to send._session
        :param kwargs: Any keywords the adaptor for the request accepts.
        :return: the response
        :rtype: requests.Response
        """
        # merge kwargs with env
        proxies = kwargs.get('proxies') or {}
        settings = self.merge_environment_settings(
            request.url, proxies, kwargs.get('stream'), kwargs.get('verify'),
            kwargs.get('cert'))
        kwargs.update(settings)

        # requests does not provide a default timeout, hence we might need
        # to add it
        if 'timeout' not in kwargs or kwargs['timeout'] is None:
            kwargs['timeout'] = 120

        if self.retry:
            current_delay = max(self.start_delay, DEFAULT_RETRY_DELAY)
            current_delay = min(current_delay, MAX_RETRY_DELAY)
            num_retries = 0
            self.logger.debug(
                "Sending request {0}  to server.".format(request))
            current_error = None
            while num_retries < MAX_NUM_RETRIES:
                try:
                    response = super(RetrySession, self).send(request,
                                                              **kwargs)
                    self.check_status(response)
                    return response
                except requests.HTTPError as e:
                    if e.response.status_code not in self.retry_errors:
                        raise exceptions.HttpException(e)
                    current_error = e
                    if e.response.status_code == requests.codes.unavailable:
                        # is there a delay from the server (Retry-After)?
                        try:
                            current_delay = int(
                                e.response.headers.get(SERVICE_RETRY,
                                                       current_delay))
                            current_delay = min(current_delay, MAX_RETRY_DELAY)
                        except Exception:
                            pass

                except requests.ConnectionError as ce:
                    current_error = ce
                    # TODO not sure this appropriate for all the
                    # 'Connection reset by peer' errors.
                    # A post/put to vospace returns a document. If operation
                    # succeeded but the error occurs during the response the
                    # code below will send the request again. Since the
                    # resource has been created/updated, a new error (bad
                    # request maybe) might be issued by the server and that
                    # can confuse the caller.
                    # This code should probably deal with HTTP errors only
                    # as the 503s above.
                    self.logger.debug("Caught exception: {0}".format(ce))
                    if ce.errno != 104:
                        # Only continue trying on a reset by peer error.
                        raise exceptions.HttpException(orig_exception=ce)
                self.logger.warning(
                    "Resending request in {}s ...".format(current_delay))
                time.sleep(current_delay)
                num_retries += 1
                current_delay = min(current_delay * 2, MAX_RETRY_DELAY)
            raise exceptions.HttpException(current_error)
        else:
            response = super(RetrySession, self).send(request, **kwargs)
            self.check_status(response)
            return response
Ejemplo n.º 3
0
    def put_file(self,
                 archive,
                 src_file,
                 archive_stream=None,
                 mime_type=None,
                 mime_encoding=None,
                 md5_check=True,
                 input_name=None):
        """
        Puts a file into the archive storage
        :param archive: name of the archive
        :param src_file: location of the source file
        :param archive_stream: specific archive stream
        :param mime_type: file mime type
        :param mime_encoding: file mime encoding
        :param md5_check: if True, calculate the md5sum before sending the file
        Server will fail if it receives a corrupted file.
        :param input_name: name to use in the archive overriding the actual
        file name.
        """
        if not archive:
            raise AttributeError('No archive specified')

        # We actually raise an exception here since the web
        # service will normally respond with a 200 for an
        # anonymous put, though not provide any endpoints.
        if self._data_client.subject.anon:
            raise exceptions.UnauthorizedException(
                'Must be authenticated to put data')

        self.logger.debug('PUT {}/{}'.format(archive, src_file))

        headers = {}
        if md5_check:
            # calculate the md5sum
            md5sum = self._get_md5sum(src_file)
            headers['Content-MD5'] = md5sum
            logger.debug('Set Content-MD5: {}'.format(md5sum))

        if archive_stream is not None:
            headers[ARCHIVE_STREAM_HTTP_HEADER] = str(archive_stream)
        if mime_type is not None:
            mtype = mime_type
        elif MAGIC_WARN:
            mtype = None
            logger.warning(MAGIC_WARN)
        else:
            m = magic.Magic(mime=True)
            mtype = m.from_file(os.path.realpath(src_file))
        if mtype is not None:
            headers['Content-Type'] = mtype
            logger.debug('Set MIME type: {}'.format(mtype))

        if mime_encoding:
            mencoding = mime_encoding
        elif MAGIC_WARN:
            mencoding = None
            if mtype:
                logger.warning(MAGIC_WARN)
        else:
            m = magic.Magic(mime_encoding=True)
            mencoding = m.from_file(os.path.realpath(src_file))
        if mencoding:
            headers['Content-Encoding'] = mencoding
            logger.debug('Set MIME encoding: {}'.format(mencoding))

        fname = input_name
        if not fname:
            fname = os.path.basename(src_file)

        protocols = self._get_transfer_protocols(archive,
                                                 fname,
                                                 is_get=False,
                                                 headers=headers)
        if len(protocols) == 0:
            raise exceptions.HttpException('No URLs available to put data to')

        # get the list of transfer points
        for protocol in protocols:
            url = protocol.endpoint
            if url is None:
                self.logger.debug('No endpoint for URI, skipping.')
                continue
            self.logger.debug('PUT to URL {}'.format(url))

            try:
                start = time.time()
                with open(src_file, 'rb') as f:
                    self._data_client.put(url, headers=headers, data=f)
                duration = time.time() - start
                stat_info = os.stat(src_file)
                self.logger.info(
                    ('Successfully uploaded archive/file {}/{} in {}s '
                     '(avg. speed: {}MB/s)').format(
                         archive, src_file, round(duration, 2),
                         round(stat_info.st_size / 1024 / 1024 / duration, 2)))
                return
            except (exceptions.HttpException, socket.timeout) as e:
                # try a different URL
                self.logger.info(
                    'WARN: Cannot put data to {}. Exception: {}'.format(
                        url, e))
                self.logger.warn('Try the next URL')
                continue
        raise exceptions.HttpException(
            'Unable to put data from any of the available URLs')
Ejemplo n.º 4
0
    def get_file(self,
                 archive,
                 file_name,
                 destination=None,
                 decompress=False,
                 cutout=None,
                 fhead=False,
                 wcs=False,
                 process_bytes=None,
                 md5_check=True):
        """
        Get a file from an archive. The entire file is delivered unless the
         cutout argument is present specifying a cutout to extract from file.
        :param archive: name of the archive containing the file
        :param file_name: the name of the file to retrieve
        :param destination: file to save data to (file, file_name, stream or
        anything that supports open/close and write). If None, the file is
        saved locally with the name provided by the content disposion received
        from the service.
        :param decompress: True to decompress the file (if applicable),
        False otherwise
        :param cutout: the arguments of cutout operation to be performed by
        the service
        :param fhead: download just the head of a fits file
        :param wcs: True if the wcs is to be included with the file
        :param process_bytes: function to be applied to the received bytes
        :param md5_check: if True, do md5sum check for corrupted data
        :return: the data stream object
        """
        assert archive is not None
        assert file_name is not None
        params = {}
        if fhead:
            params['fhead'] = fhead
        if wcs:
            params['wcs'] = wcs
        if cutout:
            params['cutout'] = cutout
        file_info = '{}/{}'.format(archive, file_name)
        self.logger.debug('GET {}'.format(file_info))
        # TODO negotiate transfer even for fhead or wcs?
        protocols = self._get_transfer_protocols(archive,
                                                 file_name,
                                                 params=params)
        if len(protocols) == 0:
            raise exceptions.HttpException('No URLs available to access data')

        # get the list of transfer points
        for protocol in protocols:
            url = protocol.endpoint
            if url is None:
                self.logger.debug('No endpoint for URI, skipping.')
                continue
            self.logger.debug('GET from URL {}'.format(url))
            try:
                response = self._data_client.get(url, stream=True)
                if destination is not None:
                    if not hasattr(destination, 'read'):
                        # got a destination name?
                        with open(destination, 'wb') as f:
                            self._save_bytes(response,
                                             f,
                                             file_info,
                                             decompress=decompress,
                                             process_bytes=process_bytes,
                                             md5_check=md5_check)
                    else:
                        self._save_bytes(response,
                                         destination,
                                         file_info,
                                         decompress=decompress,
                                         process_bytes=process_bytes,
                                         md5_check=md5_check)
                else:
                    # get the destination name from the content disposition
                    content_disp = response.headers.get(
                        'content-disposition', '')
                    destination = file_name
                    for content in content_disp.split():
                        if 'filename=' in content:
                            destination = content[9:]
                            self.logger.debug(
                                'Content disposition destination name: {}'.
                                format(destination))
                    if destination.endswith('gz') and decompress:
                        destination = os.path.splitext(destination)[0]
                    # remove any path information and save the file in local
                    # directory
                    destination = os.path.basename(destination)
                    self.logger.info(
                        'Saved file in local directory under: {}'.format(
                            destination))
                    with open(destination, 'wb') as f:
                        self._save_bytes(response,
                                         f,
                                         file_info,
                                         decompress=decompress,
                                         process_bytes=process_bytes,
                                         md5_check=md5_check)
                return
            except (exceptions.HttpException, socket.timeout) as e:
                # try a different URL
                self.logger.info(
                    'WARN: Cannot retrieve data from {}. Exception: {}'.format(
                        url, e))
                self.logger.warn('Try the next URL')
                continue
            except DownloadError as e:
                if not hasattr(destination, 'read'):
                    # try to cleanup the corrupted file
                    try:
                        os.unlink(destination)
                    except Exception:
                        # nothing we can do
                        pass
                raise exceptions.HttpException(str(e))
        raise exceptions.HttpException(
            'Unable to download data from any of the available URLs')
Ejemplo n.º 5
0
    def test_get_reg(self, get_mock, file_mock, file_modtime_mock):
        """
        Tests the registry part of WsCapabilities
        """
        # test when registry information is read from the server
        # (cache is outdated)
        service = 'myservice'
        resource_id = 'ivo://canfar.phys.uvic.ca/{}'.format(service)
        resource_cap_url = 'http://www.canfar.net/myservice'
        cadcreg_content = ('#test content\n {} = {} \n'
                           'ivo://some.provider/service = '
                           'http://providerurl.test/service'). \
            format(resource_id, resource_cap_url)
        response = Mock(text=cadcreg_content)
        get_mock.return_value = response
        # set the modified time of the cache file to 0 to make sure the info
        # is retrieved from server
        file_modtime_mock.return_value = 0
        # test anonymous access
        fh_mock = Mock()
        file_mock.write = fh_mock
        client = Mock(resource_id=resource_id)
        caps = ws.WsCapabilities(client)
        self.assertEqual(os.path.join(ws.CACHE_LOCATION, ws.REGISTRY_FILE),
                         caps.reg_file)
        self.assertEqual(
            os.path.join(ws.CACHE_LOCATION, 'canfar.phys.uvic.ca',
                         '.{}'.format(service)), caps.caps_file)
        self.assertEqual(resource_cap_url, caps._get_capability_url())
        file_mock.assert_called_once_with(
            os.path.join(ws.CACHE_LOCATION, ws.REGISTRY_FILE), 'w')
        # TODO not sure why need to access write this way
        file_mock().__enter__.return_value.write.assert_called_once_with(
            cadcreg_content)

        # test when registry information is retrieved from the cache file
        get_mock.reset_mock()
        get_mock.return_value = None
        file_modtime_mock.reset_mock()
        file_mock.reset_mock()
        resource_cap_url2 = 'http://www.canfar.net/myservice2'
        cache_content2 = ('#test content\n {} = {} \n'
                          'ivo://some.provider/service = '
                          'http://providerurl.test/service'). \
            format(resource_id, resource_cap_url2)
        file_modtime_mock.return_value = time.time()
        file_mock().__enter__.return_value.read.return_value = cache_content2
        caps = ws.WsCapabilities(client)
        self.assertEqual(resource_cap_url2, caps._get_capability_url())

        # test when registry information is outdated but there are
        # errors retrieving it from the CADC registry
        # so in the end go back and use the cache version
        file_modtime_mock.reset_mock()
        file_mock.reset_mock()
        file_modtime_mock.return_value = 0
        file_mock().__enter__.return_value.read.return_value = cache_content2
        get_mock.side_effect = [exceptions.HttpException()]
        client.get.side_effect = [exceptions.HttpException]
        caps = ws.WsCapabilities(client)
        self.assertEqual(resource_cap_url2, caps._get_capability_url())