Beispiel #1
0
    def __init__(self,
                 schema_validate=False,
                 chunk_size=1024,
                 *args,
                 **kwargs):
        """Data service client constructor.

        schema_validate -- validate XML transfer document if True
        chunk_size -- buffer size for get file streaming
        """

        super(DataClient, self).__init__(*args, **kwargs)

        self.transfer_reader = TransferReader(validate=schema_validate)
        self.transfer_writer = TransferWriter()

        self.chunk_size = chunk_size

        # Specific base_url for data webservice, and a separate base URL
        # that will be used for HEAD requests (either */data/auth or
        # */data/pub).
        self.base_url = self.base_url + '/data'
        self.head_base_url = self.base_url
        if self.basic_auth is not None:
            self.base_url = self.base_url + '/auth'
            self.head_base_url = self.head_base_url + '/auth'
        else:
            self.head_base_url = self.head_base_url + '/pub'
        self.base_url = self.base_url + '/transfer'
Beispiel #2
0
    def __init__(self, schema_validate=False, chunk_size=1024, *args, **kwargs):
        """Data service client constructor.

        schema_validate -- validate XML transfer document if True
        chunk_size -- buffer size for get file streaming
        """

        super(DataClient, self).__init__(*args, **kwargs)

        self.transfer_reader = TransferReader(validate=schema_validate)
        self.transfer_writer = TransferWriter()

        self.chunk_size = chunk_size

        # Specific base_url for data webservice, and a separate base URL
        # that will be used for HEAD requests (either */data/auth or
        # */data/pub).
        self.base_url = self.base_url + '/data'
        self.head_base_url = self.base_url
        if self.basic_auth is not None:
            self.base_url = self.base_url + '/auth'
            self.head_base_url = self.head_base_url + '/auth'
        else:
            self.head_base_url = self.head_base_url + '/pub'
        self.base_url = self.base_url + '/transfer'
    def test_roundtrip_get(self):
        tran = Transfer(test_target_good,
                        test_dir_get,
                        protocols=[
                            Protocol(DIRECTION_PROTOCOL_MAP['pullFromVoSpace'],
                                     endpoint='http://somewhere')
                        ],
                        properties={
                            'LENGTH': '1234',
                            'uri=ivo://ivoa.net/vospace/core#quota': '100'
                        },
                        version=VOSPACE_21)

        xml_str = TransferWriter().write(tran)
        tran2 = TransferReader(validate=True).read(xml_str)

        self.assertEqual(tran.target, tran2.target, 'Wrong target.')
        self.assertEqual(tran.direction, tran2.direction, 'Wrong direction.')
        self.assertEqual(tran.properties, tran2.properties,
                         'Wrong properties.')
        self.assertEqual(len(tran.protocols), len(tran2.protocols),
                         'Wrong number of protocols.')
        for i in range(len(tran.protocols)):
            p1 = tran.protocols[i]
            p2 = tran2.protocols[i]

            self.assertEqual(p1.uri, p1.uri, 'Wrong uri, protocol %i' % i)
            self.assertEqual(p1.endpoint, p1.endpoint,
                             'Wrong endpoint, protocol %i' % i)
    def test_validation(self):
        # VOSPACE_20
        tran = Transfer(test_target_good, test_dir_put, version=VOSPACE_20)
        xml_str = TransferWriter().write(tran)
        tran2 = TransferReader(validate=True).read(xml_str)

        # VOSPACE_21
        tran = Transfer(test_target_good,
                        test_dir_put,
                        properties={'LENGTH': '1234'},
                        version=VOSPACE_21)
        xml_str = TransferWriter().write(tran)

        # introduce an error that schema validation should catch
        xml = etree.fromstring(xml_str)
        junk = etree.SubElement(xml, 'junk')
        xml_str2 = etree.tostring(xml, encoding='UTF-8', pretty_print=True)

        # should not raise exception because validation turned off by default
        tran2 = TransferReader().read(xml_str2)

        # should now raise exception with validation turned on
        with self.assertRaises(etree.DocumentInvalid):
            tran2 = TransferReader(validate=True).read(xml_str2)
    def test_roundtrip_put(self):
        tran = Transfer(test_target_good,
                        test_dir_put,
                        properties={'LENGTH': '1234'},
                        version=VOSPACE_21)
        xml_str = TransferWriter().write(tran)
        tran2 = TransferReader(validate=True).read(xml_str)

        self.assertEqual(tran.target, tran2.target, 'Wrong target.')
        self.assertEqual(tran.direction, tran2.direction, 'Wrong direction.')
        self.assertEqual(tran.properties, tran2.properties,
                         'Wrong properties.')
        self.assertEqual(len(tran.protocols), len(tran2.protocols),
                         'Wrong number of protocols.')
        for i in range(len(tran.protocols)):
            p1 = tran.protocols[i]
            p2 = tran2.protocols[i]

            self.assertEqual(p1.uri, p1.uri, 'Wrong uri, protocol %i' % i)
            self.assertEqual(p1.endpoint, p1.endpoint,
                             'Wrong endpoint, protocol %i' % i)
Beispiel #6
0
class DataClient(BaseClient):
    """Class for interacting with the data web service"""

    def __init__(self, schema_validate=False, chunk_size=1024, *args, **kwargs):
        """Data service client constructor.

        schema_validate -- validate XML transfer document if True
        chunk_size -- buffer size for get file streaming
        """

        super(DataClient, self).__init__(*args, **kwargs)

        self.transfer_reader = TransferReader(validate=schema_validate)
        self.transfer_writer = TransferWriter()

        self.chunk_size = chunk_size

        # Specific base_url for data webservice, and a separate base URL
        # that will be used for HEAD requests (either */data/auth or
        # */data/pub).
        self.base_url = self.base_url + '/data'
        self.head_base_url = self.base_url
        if self.basic_auth is not None:
            self.base_url = self.base_url + '/auth'
            self.head_base_url = self.head_base_url + '/auth'
        else:
            self.head_base_url = self.head_base_url + '/pub'
        self.base_url = self.base_url + '/transfer'

    def _make_logger(self):
        """ Logger for data client """
        self.logger = logging.getLogger('dataclient')

    def transfer_file(self, localfile, uri=None, filename=None, is_put=False,
                      archive=None, stream=None):
        """ Copy file to/from data/vos web service

        localfile -- file name on disk
        uri       -- URI for remote file
        is_put    -- True for put, False for get.
        stream    -- Optional stream name for data web service transfers

        If uri is not specified it can be generated for data web service
        transfers given an archive and filename:

        filename  -- remote name for file (if unspecified use localfile)
        archive   -- Internally create URI from archive and file name
        """

        if uri is not None:
            # User provides the uri
            uri_transfer = uri
        else:
            if archive is not None:
                # archive is used to form a data web service uri
                uri_transfer = 'ad:%s/' % archive
                if filename is None:
                    # derive filename in archive from localfile
                    uri_transfer = uri_transfer + (localfile.split('/'))[-1]
                else:
                    # archive filename provided
                    uri_transfer = uri_transfer + filename
            else:
                raise ValueError('Must specify either uri or archive')

        # Direction-dependent setup
        if is_put:
            if not self.is_authorized:
                # We actually raise an exception here since the web
                # service will normally respond with a 200 for an
                # anonymous put, though not provide any endpoints.
                raise UnauthorizedException(
                    "Unauthorized clients cannot put files.")
            dir_str = 'to'
            tran = Transfer( uri_transfer, 'pushToVoSpace' )
            f = open(localfile, 'rb')
        else:
            dir_str = 'from'
            tran = Transfer( uri_transfer, 'pullFromVoSpace' )
            f = open(localfile, 'wb')

        # If a stream is supplied it goes in an Http header
        if stream is not None:
            headers = {'X-CADC-Stream':stream}
        else:
            headers = None

        self.logger.debug("Using service %s to transfer %s %s %s (%s)" %
                          (self.base_url, localfile, dir_str, uri_transfer,
                           str(stream)) )

        # obtain list of endpoints by sending a transfer document and
        # looking at the URLs in the returned document
        request_xml = self.transfer_writer.write( tran )
        response = self._upload_xml( self.base_url, request_xml, 'POST',
                                     headers=headers)
        response_str = response.text.encode('utf-8')

        self.logger.debug("POST had %i redirects" % len(response.history))
        self.logger.debug("Response code: %i, URL: %s" % \
                              (response.status_code,response.url) )
        self.logger.debug("Full XML response:\n%s" % response_str)

        tran = self.transfer_reader.read( response_str )

        # Try transfering to/from endpoint until one works
        success = False
        for protocol in tran.protocols:
            url = protocol.endpoint
            if url is None:
                self.logger.debug(
                    'No endpoint for URI, skipping.')
                continue


            self.logger.debug('Transferring %s %s' % (dir_str, url) )

            try:
                if is_put:
                    r = requests.put(url, data=f)
                    self.check_exception(r)
                else:
                    self.logger.debug('Get streaming chunk_size is %i' % \
                                          self.chunk_size)
                    r = requests.get(url, stream=True)
                    with open(localfile, 'wb') as f:
                        for chunk in r.iter_content(chunk_size=self.chunk_size):
                            if chunk:
                                f.write(chunk)
                                f.flush

                    self.check_exception(r)
                success = True
                break
            except Exception as e:
                # Reset to start of file. Try next endpoint
                if not f.closed:
                    f.seek(0)
                self.logger.warning('Transfer %s %s %s failed:\n%s' %
                                    (str(localfile), str(dir_str),
                                     str(uri_transfer), str(e)) )
                continue
        f.close()

        if not success:
            msg = 'Failed to transfer %s %s %s. ' % (str(localfile),
                                                     str(dir_str),
                                                     str(uri_transfer))
            msg = msg + 'File missing or user lacks permission?'
            self.logger.error(msg)
            raise TransferException(msg)

        # Do a HEAD to compare md5sums?

    def data_info(self, archive, filename):
        """ Perform a HEAD with data web service for minimal information """

        url = self.head_base_url + '/%s/%s' % (archive,filename)
        self.logger.debug('Performing HEAD request on %s' % (url) )
        r = self._head_request(url)

        return r
Beispiel #7
0
class DataClient(BaseClient):
    """Class for interacting with the data web service"""
    def __init__(self,
                 schema_validate=False,
                 chunk_size=1024,
                 *args,
                 **kwargs):
        """Data service client constructor.

        schema_validate -- validate XML transfer document if True
        chunk_size -- buffer size for get file streaming
        """

        super(DataClient, self).__init__(*args, **kwargs)

        self.transfer_reader = TransferReader(validate=schema_validate)
        self.transfer_writer = TransferWriter()

        self.chunk_size = chunk_size

        # Specific base_url for data webservice, and a separate base URL
        # that will be used for HEAD requests (either */data/auth or
        # */data/pub).
        self.base_url = self.base_url + '/data'
        self.head_base_url = self.base_url
        if self.basic_auth is not None:
            self.base_url = self.base_url + '/auth'
            self.head_base_url = self.head_base_url + '/auth'
        else:
            self.head_base_url = self.head_base_url + '/pub'
        self.base_url = self.base_url + '/transfer'

    def _make_logger(self):
        """ Logger for data client """
        self.logger = logging.getLogger('dataclient')

    def transfer_file(self,
                      localfile,
                      uri=None,
                      filename=None,
                      is_put=False,
                      archive=None,
                      stream=None):
        """ Copy file to/from data/vos web service

        localfile -- file name on disk
        uri       -- URI for remote file
        is_put    -- True for put, False for get.
        stream    -- Optional stream name for data web service transfers

        If uri is not specified it can be generated for data web service
        transfers given an archive and filename:

        filename  -- remote name for file (if unspecified use localfile)
        archive   -- Internally create URI from archive and file name
        """

        if uri is not None:
            # User provides the uri
            uri_transfer = uri
        else:
            if archive is not None:
                # archive is used to form a data web service uri
                uri_transfer = 'ad:%s/' % archive
                if filename is None:
                    # derive filename in archive from localfile
                    uri_transfer = uri_transfer + (localfile.split('/'))[-1]
                else:
                    # archive filename provided
                    uri_transfer = uri_transfer + filename
            else:
                raise ValueError('Must specify either uri or archive')

        # Direction-dependent setup
        if is_put:
            if not self.is_authorized:
                # We actually raise an exception here since the web
                # service will normally respond with a 200 for an
                # anonymous put, though not provide any endpoints.
                raise UnauthorizedException(
                    "Unauthorized clients cannot put files.")
            dir_str = 'to'
            tran = Transfer(uri_transfer, 'pushToVoSpace')
            f = open(localfile, 'rb')
        else:
            dir_str = 'from'
            tran = Transfer(uri_transfer, 'pullFromVoSpace')
            f = open(localfile, 'wb')

        # If a stream is supplied it goes in an Http header
        if stream is not None:
            headers = {'X-CADC-Stream': stream}
        else:
            headers = None

        self.logger.debug(
            "Using service %s to transfer %s %s %s (%s)" %
            (self.base_url, localfile, dir_str, uri_transfer, str(stream)))

        # obtain list of endpoints by sending a transfer document and
        # looking at the URLs in the returned document
        request_xml = self.transfer_writer.write(tran)
        response = self._upload_xml(self.base_url,
                                    request_xml,
                                    'POST',
                                    headers=headers)
        response_str = response.text.encode('utf-8')

        self.logger.debug("POST had %i redirects" % len(response.history))
        self.logger.debug("Response code: %i, URL: %s" % \
                              (response.status_code,response.url) )
        self.logger.debug("Full XML response:\n%s" % response_str)

        tran = self.transfer_reader.read(response_str)

        # Try transfering to/from endpoint until one works
        success = False
        for protocol in tran.protocols:
            url = protocol.endpoint
            if url is None:
                self.logger.debug('No endpoint for URI, skipping.')
                continue

            self.logger.debug('Transferring %s %s' % (dir_str, url))

            try:
                if is_put:
                    r = requests.put(url, data=f)
                    self.check_exception(r)
                else:
                    self.logger.debug('Get streaming chunk_size is %i' % \
                                          self.chunk_size)
                    r = requests.get(url, stream=True)
                    with open(localfile, 'wb') as f:
                        for chunk in r.iter_content(
                                chunk_size=self.chunk_size):
                            if chunk:
                                f.write(chunk)
                                f.flush

                    self.check_exception(r)
                success = True
                break
            except Exception as e:
                # Reset to start of file. Try next endpoint
                if not f.closed:
                    f.seek(0)
                self.logger.warning(
                    'Transfer %s %s %s failed:\n%s' %
                    (str(localfile), str(dir_str), str(uri_transfer), str(e)))
                continue
        f.close()

        if not success:
            msg = 'Failed to transfer %s %s %s. ' % (
                str(localfile), str(dir_str), str(uri_transfer))
            msg = msg + 'File missing or user lacks permission?'
            self.logger.error(msg)
            raise TransferException(msg)

        # Do a HEAD to compare md5sums?

    def data_info(self, archive, filename):
        """ Perform a HEAD with data web service for minimal information """

        url = self.head_base_url + '/%s/%s' % (archive, filename)
        self.logger.debug('Performing HEAD request on %s' % (url))
        r = self._head_request(url)

        return r