def __init__(self, schema_validate=False, chunk_size=1024, *args, **kwargs): """Data service client constructor. schema_validate -- validate XML transfer document if True chunk_size -- buffer size for get file streaming """ super(DataClient, self).__init__(*args, **kwargs) self.transfer_reader = TransferReader(validate=schema_validate) self.transfer_writer = TransferWriter() self.chunk_size = chunk_size # Specific base_url for data webservice, and a separate base URL # that will be used for HEAD requests (either */data/auth or # */data/pub). self.base_url = self.base_url + '/data' self.head_base_url = self.base_url if self.basic_auth is not None: self.base_url = self.base_url + '/auth' self.head_base_url = self.head_base_url + '/auth' else: self.head_base_url = self.head_base_url + '/pub' self.base_url = self.base_url + '/transfer'
def test_roundtrip_get(self): tran = Transfer(test_target_good, test_dir_get, protocols=[ Protocol(DIRECTION_PROTOCOL_MAP['pullFromVoSpace'], endpoint='http://somewhere') ], properties={ 'LENGTH': '1234', 'uri=ivo://ivoa.net/vospace/core#quota': '100' }, version=VOSPACE_21) xml_str = TransferWriter().write(tran) tran2 = TransferReader(validate=True).read(xml_str) self.assertEqual(tran.target, tran2.target, 'Wrong target.') self.assertEqual(tran.direction, tran2.direction, 'Wrong direction.') self.assertEqual(tran.properties, tran2.properties, 'Wrong properties.') self.assertEqual(len(tran.protocols), len(tran2.protocols), 'Wrong number of protocols.') for i in range(len(tran.protocols)): p1 = tran.protocols[i] p2 = tran2.protocols[i] self.assertEqual(p1.uri, p1.uri, 'Wrong uri, protocol %i' % i) self.assertEqual(p1.endpoint, p1.endpoint, 'Wrong endpoint, protocol %i' % i)
def test_validation(self): # VOSPACE_20 tran = Transfer(test_target_good, test_dir_put, version=VOSPACE_20) xml_str = TransferWriter().write(tran) tran2 = TransferReader(validate=True).read(xml_str) # VOSPACE_21 tran = Transfer(test_target_good, test_dir_put, properties={'LENGTH': '1234'}, version=VOSPACE_21) xml_str = TransferWriter().write(tran) # introduce an error that schema validation should catch xml = etree.fromstring(xml_str) junk = etree.SubElement(xml, 'junk') xml_str2 = etree.tostring(xml, encoding='UTF-8', pretty_print=True) # should not raise exception because validation turned off by default tran2 = TransferReader().read(xml_str2) # should now raise exception with validation turned on with self.assertRaises(etree.DocumentInvalid): tran2 = TransferReader(validate=True).read(xml_str2)
def test_roundtrip_put(self): tran = Transfer(test_target_good, test_dir_put, properties={'LENGTH': '1234'}, version=VOSPACE_21) xml_str = TransferWriter().write(tran) tran2 = TransferReader(validate=True).read(xml_str) self.assertEqual(tran.target, tran2.target, 'Wrong target.') self.assertEqual(tran.direction, tran2.direction, 'Wrong direction.') self.assertEqual(tran.properties, tran2.properties, 'Wrong properties.') self.assertEqual(len(tran.protocols), len(tran2.protocols), 'Wrong number of protocols.') for i in range(len(tran.protocols)): p1 = tran.protocols[i] p2 = tran2.protocols[i] self.assertEqual(p1.uri, p1.uri, 'Wrong uri, protocol %i' % i) self.assertEqual(p1.endpoint, p1.endpoint, 'Wrong endpoint, protocol %i' % i)
class DataClient(BaseClient): """Class for interacting with the data web service""" def __init__(self, schema_validate=False, chunk_size=1024, *args, **kwargs): """Data service client constructor. schema_validate -- validate XML transfer document if True chunk_size -- buffer size for get file streaming """ super(DataClient, self).__init__(*args, **kwargs) self.transfer_reader = TransferReader(validate=schema_validate) self.transfer_writer = TransferWriter() self.chunk_size = chunk_size # Specific base_url for data webservice, and a separate base URL # that will be used for HEAD requests (either */data/auth or # */data/pub). self.base_url = self.base_url + '/data' self.head_base_url = self.base_url if self.basic_auth is not None: self.base_url = self.base_url + '/auth' self.head_base_url = self.head_base_url + '/auth' else: self.head_base_url = self.head_base_url + '/pub' self.base_url = self.base_url + '/transfer' def _make_logger(self): """ Logger for data client """ self.logger = logging.getLogger('dataclient') def transfer_file(self, localfile, uri=None, filename=None, is_put=False, archive=None, stream=None): """ Copy file to/from data/vos web service localfile -- file name on disk uri -- URI for remote file is_put -- True for put, False for get. stream -- Optional stream name for data web service transfers If uri is not specified it can be generated for data web service transfers given an archive and filename: filename -- remote name for file (if unspecified use localfile) archive -- Internally create URI from archive and file name """ if uri is not None: # User provides the uri uri_transfer = uri else: if archive is not None: # archive is used to form a data web service uri uri_transfer = 'ad:%s/' % archive if filename is None: # derive filename in archive from localfile uri_transfer = uri_transfer + (localfile.split('/'))[-1] else: # archive filename provided uri_transfer = uri_transfer + filename else: raise ValueError('Must specify either uri or archive') # Direction-dependent setup if is_put: if not self.is_authorized: # We actually raise an exception here since the web # service will normally respond with a 200 for an # anonymous put, though not provide any endpoints. raise UnauthorizedException( "Unauthorized clients cannot put files.") dir_str = 'to' tran = Transfer( uri_transfer, 'pushToVoSpace' ) f = open(localfile, 'rb') else: dir_str = 'from' tran = Transfer( uri_transfer, 'pullFromVoSpace' ) f = open(localfile, 'wb') # If a stream is supplied it goes in an Http header if stream is not None: headers = {'X-CADC-Stream':stream} else: headers = None self.logger.debug("Using service %s to transfer %s %s %s (%s)" % (self.base_url, localfile, dir_str, uri_transfer, str(stream)) ) # obtain list of endpoints by sending a transfer document and # looking at the URLs in the returned document request_xml = self.transfer_writer.write( tran ) response = self._upload_xml( self.base_url, request_xml, 'POST', headers=headers) response_str = response.text.encode('utf-8') self.logger.debug("POST had %i redirects" % len(response.history)) self.logger.debug("Response code: %i, URL: %s" % \ (response.status_code,response.url) ) self.logger.debug("Full XML response:\n%s" % response_str) tran = self.transfer_reader.read( response_str ) # Try transfering to/from endpoint until one works success = False for protocol in tran.protocols: url = protocol.endpoint if url is None: self.logger.debug( 'No endpoint for URI, skipping.') continue self.logger.debug('Transferring %s %s' % (dir_str, url) ) try: if is_put: r = requests.put(url, data=f) self.check_exception(r) else: self.logger.debug('Get streaming chunk_size is %i' % \ self.chunk_size) r = requests.get(url, stream=True) with open(localfile, 'wb') as f: for chunk in r.iter_content(chunk_size=self.chunk_size): if chunk: f.write(chunk) f.flush self.check_exception(r) success = True break except Exception as e: # Reset to start of file. Try next endpoint if not f.closed: f.seek(0) self.logger.warning('Transfer %s %s %s failed:\n%s' % (str(localfile), str(dir_str), str(uri_transfer), str(e)) ) continue f.close() if not success: msg = 'Failed to transfer %s %s %s. ' % (str(localfile), str(dir_str), str(uri_transfer)) msg = msg + 'File missing or user lacks permission?' self.logger.error(msg) raise TransferException(msg) # Do a HEAD to compare md5sums? def data_info(self, archive, filename): """ Perform a HEAD with data web service for minimal information """ url = self.head_base_url + '/%s/%s' % (archive,filename) self.logger.debug('Performing HEAD request on %s' % (url) ) r = self._head_request(url) return r
class DataClient(BaseClient): """Class for interacting with the data web service""" def __init__(self, schema_validate=False, chunk_size=1024, *args, **kwargs): """Data service client constructor. schema_validate -- validate XML transfer document if True chunk_size -- buffer size for get file streaming """ super(DataClient, self).__init__(*args, **kwargs) self.transfer_reader = TransferReader(validate=schema_validate) self.transfer_writer = TransferWriter() self.chunk_size = chunk_size # Specific base_url for data webservice, and a separate base URL # that will be used for HEAD requests (either */data/auth or # */data/pub). self.base_url = self.base_url + '/data' self.head_base_url = self.base_url if self.basic_auth is not None: self.base_url = self.base_url + '/auth' self.head_base_url = self.head_base_url + '/auth' else: self.head_base_url = self.head_base_url + '/pub' self.base_url = self.base_url + '/transfer' def _make_logger(self): """ Logger for data client """ self.logger = logging.getLogger('dataclient') def transfer_file(self, localfile, uri=None, filename=None, is_put=False, archive=None, stream=None): """ Copy file to/from data/vos web service localfile -- file name on disk uri -- URI for remote file is_put -- True for put, False for get. stream -- Optional stream name for data web service transfers If uri is not specified it can be generated for data web service transfers given an archive and filename: filename -- remote name for file (if unspecified use localfile) archive -- Internally create URI from archive and file name """ if uri is not None: # User provides the uri uri_transfer = uri else: if archive is not None: # archive is used to form a data web service uri uri_transfer = 'ad:%s/' % archive if filename is None: # derive filename in archive from localfile uri_transfer = uri_transfer + (localfile.split('/'))[-1] else: # archive filename provided uri_transfer = uri_transfer + filename else: raise ValueError('Must specify either uri or archive') # Direction-dependent setup if is_put: if not self.is_authorized: # We actually raise an exception here since the web # service will normally respond with a 200 for an # anonymous put, though not provide any endpoints. raise UnauthorizedException( "Unauthorized clients cannot put files.") dir_str = 'to' tran = Transfer(uri_transfer, 'pushToVoSpace') f = open(localfile, 'rb') else: dir_str = 'from' tran = Transfer(uri_transfer, 'pullFromVoSpace') f = open(localfile, 'wb') # If a stream is supplied it goes in an Http header if stream is not None: headers = {'X-CADC-Stream': stream} else: headers = None self.logger.debug( "Using service %s to transfer %s %s %s (%s)" % (self.base_url, localfile, dir_str, uri_transfer, str(stream))) # obtain list of endpoints by sending a transfer document and # looking at the URLs in the returned document request_xml = self.transfer_writer.write(tran) response = self._upload_xml(self.base_url, request_xml, 'POST', headers=headers) response_str = response.text.encode('utf-8') self.logger.debug("POST had %i redirects" % len(response.history)) self.logger.debug("Response code: %i, URL: %s" % \ (response.status_code,response.url) ) self.logger.debug("Full XML response:\n%s" % response_str) tran = self.transfer_reader.read(response_str) # Try transfering to/from endpoint until one works success = False for protocol in tran.protocols: url = protocol.endpoint if url is None: self.logger.debug('No endpoint for URI, skipping.') continue self.logger.debug('Transferring %s %s' % (dir_str, url)) try: if is_put: r = requests.put(url, data=f) self.check_exception(r) else: self.logger.debug('Get streaming chunk_size is %i' % \ self.chunk_size) r = requests.get(url, stream=True) with open(localfile, 'wb') as f: for chunk in r.iter_content( chunk_size=self.chunk_size): if chunk: f.write(chunk) f.flush self.check_exception(r) success = True break except Exception as e: # Reset to start of file. Try next endpoint if not f.closed: f.seek(0) self.logger.warning( 'Transfer %s %s %s failed:\n%s' % (str(localfile), str(dir_str), str(uri_transfer), str(e))) continue f.close() if not success: msg = 'Failed to transfer %s %s %s. ' % ( str(localfile), str(dir_str), str(uri_transfer)) msg = msg + 'File missing or user lacks permission?' self.logger.error(msg) raise TransferException(msg) # Do a HEAD to compare md5sums? def data_info(self, archive, filename): """ Perform a HEAD with data web service for minimal information """ url = self.head_base_url + '/%s/%s' % (archive, filename) self.logger.debug('Performing HEAD request on %s' % (url)) r = self._head_request(url) return r