def test_01_massive_file(self): http = UrlLib2Layer() conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, http_impl=http) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="scalability testing", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri=col.href, metadata_entry=e) receipt = conn.get_deposit_receipt(receipt.location) # now do the replace with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="massive_file.zip", packaging='http://purl.org/net/sword/package/Binary') assert new_receipt.code == 204
def preflight_submission(self): opener = self.get_opener() conn = Connection(self.sword2_sd_url, error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) logger.debug("Retrieving the service document") conn.get_service_document() logger.debug("Retrieved the service document") self.assertIsNotNone(conn.sd) self.assertIsNotNone(conn.sd.workspaces) self.assertNotEqual(len(conn.sd.workspaces), 0) workspace = conn.sd.workspaces[0][1] # we require there to be at least one collection self.assertNotEqual(len(workspace), 0) col = workspace[0] testid = "testid_" + str(uuid.uuid4()) logger.debug("col iri = " + str(col.href)) e = Entry(id=testid, title="test title", dcterms_abstract="test description") print str(e) receipt = conn.create(col_iri=col.href, metadata_entry=e, suggested_identifier=testid) #col.href=http://192.168.2.237/swordv2/silo/test-silo self.assertIsNotNone(receipt) self.assertEquals(receipt.code, 201) return receipt.location
def complete_submission(self, edit_uri): opener = self.get_opener() conn = Connection(self.sword2_sd_url, error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) receipt = None try: receipt = conn.get_deposit_receipt(edit_uri) except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all # don't do anything about it here - we'll try again in a moment # and then error out appropriately later pass # at this stage we need to ensure that we actually got back a deposit # receipt i = 0 while (receipt is None or receipt.code >= 400) and i < self.retry_limit: err = None if receipt is None: err = "<unable to reach server>" else: err = str(receipt.code) logger.debug( "Attempt to retrieve Entry Document failed with error " + str(err) + " ... trying again in " + str(self.retry_delay) + " seconds") i += 1 time.sleep(self.retry_delay) try: receipt = conn.get_deposit_receipt(edit_uri) except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all # just try again up to the retry_limit continue self.assertIsNotNone(receipt) self.assertEquals(receipt.code, 200) # if we get to here we can go ahead with the deposit for real with open(self.zipFileName, "rb") as data: new_receipt = conn.update( dr=receipt, payload=data, mimetype="application/zip", filename=self.dataset_identifier + ".zip", packaging='http://dataflow.ox.ac.uk/package/DataBankBagIt') self.assertIsNotNone(new_receipt) self.assertEquals(new_receipt.code, 204) return
def testServiceDocumentAccess(self): opener = self.get_opener() for i in range(10): conn = Connection(self.sword2_sd_url, error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) conn.get_service_document() self.assertIsNotNone(conn.sd, "Service document None (loop %d)" % (i)) self.assertIsNotNone( conn.sd.workspaces, "Service document workspace None (loop %d)" % (i)) self.assertNotEqual( len(conn.sd.workspaces), 0, "Service document worksoacxe count %d (loop %d)" % (len(conn.sd.workspaces), i)) return
def _check_dataset(self, dataset_submission): retry_counter = 0 exception = None while retry_counter < SwordStatementCheckThread.retry_count: try: # logger.info("Checking state of dataset at " + dataset_submission.remote_url) opener = openers.get_opener(dataset_submission.repository, dataset_submission.submitting_user) conn = Connection(error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) receipt = conn.get_deposit_receipt( dataset_submission.remote_url) statement = conn.get_ore_sword_statement( receipt.ore_statement_iri) for state_uri, state_desc in statement.states: logger.info("Dataset has state URI: " + state_uri) if state_uri in ERROR_STATES: dataset_submission.status = 'error' logger.info( "URI: " + state_uri + " is an error state ... setting 'error' state on submission record" ) break dataset_submission.last_accessed = datetime.datetime.now() dataset_submission.save() time.sleep(SwordStatementCheckThread.throttle) except urllib2.URLError as e: # if we get an exception, try again up to the limit logger.info( "Got error connecting to the server ... retrying " + str(retry_counter + 1) + " of " + str(SwordStatementCheckThread.retry_count)) retry_counter += 1 exception = e time.sleep(SwordStatementCheckThread.retry_delay) continue else: # if we don't get an exception, we're done return # if we don't return from the else statement above, it means the retries # all failed, and we have a problem. Raise the last thrown exception. raise exception
def preflight_submission(self, dataset, opener, repository, silo ): logger.debug("Carrying out pre-flight submission") # verify that we can get a service document, and that there # is at least one silo and that we can authenticate if repository.sword2_sd_url is None: raise SwordServiceError("No sword2 service-document URL for repository configuration") # get the service document (for which we must be authenticated) conn = Connection(repository.sword2_sd_url, error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) conn.get_service_document() # we require there to be at least one workspace if conn.sd is None: raise SwordServiceError("did not successfully retrieve a service document") if conn.sd.workspaces is None: raise SwordServiceError("no workspaces defined in service document") if len(conn.sd.workspaces) == 0: raise SwordServiceError("no workspaces defined in service document") workspace = conn.sd.workspaces[0][1] # we require there to be at least one collection if len(workspace) == 0: raise SwordServiceError("no collections defined in workspace") # FIXME: we don't currently have a mechanism to make decisions about # which collection to put stuff in, so we just put stuff in the first # one for the time being col = workspace[0] silohref = repository.homepage + "swordv2/silo/" + silo # assemble the entry ready for deposit, using the basic metadata # FIXME: is there anything further we need to do about the metadata here? e = Entry(id=dataset.identifier, title=dataset.title, dcterms_abstract=dataset.description) # create the item using the metadata-only approach (suppress errors along the way, # we'll check for them below) #receipt = conn.create(col_iri=col.href, metadata_entry=e, suggested_identifier=dataset.identifier) logger.debug( "Deposit is being created" ) receipt = conn.create(col_iri=silohref, metadata_entry=e, suggested_identifier=dataset.identifier) logger.debug( "Deposit created" ) # check for errors if receipt.code >= 400: # this is an error logger.debug("Received error message from server: " + receipt.to_xml()) if receipt.error_href == "http://databank.ox.ac.uk/errors/DatasetConflict": raise SwordSlugRejected() raise SwordDepositError(receipt) logger.debug("Deposit carried out to: " + receipt.location) # return receipt.location return (receipt.alternate,receipt.location)
def get_silos(self, opener, repository): logger.debug("Carrying out get_silos for a repository selected") # verify that we can get a service document, and that there # is at least one silo and that we can authenticate if repository.sword2_sd_url is None: logger.debug("No sword2 service-document URL for repository configuration : ") logger.debug(repr(repository.sword2_sd_url)) raise SwordServiceError("No sword2 service-document URL for repository configuration") logger.debug("formulating a connection object for sword repo with url : " + repository.sword2_sd_url ) # get the service document (for which we must be authenticated) conn = Connection(repository.sword2_sd_url, error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) logger.debug("before getting service document : ") conn.get_service_document() logger.debug("Got the service document") # we require there to be at least one workspace if conn.sd is None: logger.debug("did not successfully retrieve a service document") raise SwordServiceError("did not successfully retrieve a service document") if conn.sd.workspaces is None: logger.debug("no workspaces defined in service document") raise SwordServiceError("no workspaces defined in service document") if len(conn.sd.workspaces) == 0: logger.debug("no workspaces defined in service document") raise SwordServiceError("no workspaces defined in service document") workspace = conn.sd.workspaces[0][1] # we require there to be at least one collection if len(workspace) == 0: logger.debug("no collections defined in workspace") raise SwordServiceError("no collections defined in workspace") # FIXME: we don't currently have a mechanism to make decisions about # which collection to put stuff in, so we just put stuff in the first # one for the time being #col = workspace[0] logger.debug("Finished with get_silos for a repository selected") return workspace
def complete_submission(self, dataset, opener, dataset_submission, filename, retry_limit=3, retry_delay=2): logger.debug("Carrying out complete submission") # create a connection repository = dataset_submission.repository conn = Connection(repository.sword2_sd_url, error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) conn.get_service_document() # we require there to be at least one workspace if conn.sd is None: raise SwordServiceError("did not successfully retrieve a service document") if conn.sd.workspaces is None: raise SwordServiceError("no workspaces defined in service document") if len(conn.sd.workspaces) == 0: raise SwordServiceError("no workspaces defined in service document") # get hold of a copy of the deposit reciept edit_uri = dataset_submission.remote_url receipt = None try: receipt = conn.get_deposit_receipt(edit_uri) except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all # don't do anything about it here - we'll try again in a moment # and then error out appropriately later pass # at this stage we need to ensure that we actually got back a deposit # receipt i = 0 while (receipt is None or receipt.code >= 400) and i < retry_limit: err = None if receipt is None: err = "<unable to reach server>" else: err = str(receipt.code) logger.debug("Attempt to retrieve Entry Document failed with error " + str(err) + " ... trying again in " + str(retry_delay) + " seconds") i += 1 time.sleep(retry_delay) try: receipt = conn.get_deposit_receipt(edit_uri) except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all # just try again up to the retry_limit continue # if we get to here, and the receipt code is still an error, we give # up and re-set the item status if receipt is None or receipt.code >= 400: return self._set_error(dataset_submission, "error", "Attempt to retrieve Entry Document failed " + str(retry_limit + 1) + " times ... giving up") logger.debug("Entry Document retrieved ... continuing to full package deposit") # if we get to here we can go ahead with the deposit for real try: with open(filename, "rb") as data: new_receipt = conn.update(dr = receipt, payload=data, metadata_relevant = True, mimetype="application/zip", filename=dataset.identifier + ".zip", packaging='http://dataflow.ox.ac.uk/package/DataBankBagIt') if new_receipt.code >= 400: return self._set_error(dataset_submission, "error", "Attempt to deposit content failed") except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all return self._set_error(dataset_submission, "error", "Attempt to deposit content failed")
from . import TestController from sword2 import Connection, Entry, Error_Document, Atom_Sword_Statement, Ore_Sword_Statement, UrlLib2Layer from sword2.compatible_libs import etree PACKAGE = "tests/spec/example.zip" PACKAGE_MIME = "application/zip" SSS_URL = "http://localhost:8080/sd-uri" SSS_UN = "sword" SSS_PW = "sword" SSS_OBO = "obo" http = UrlLib2Layer() # Note, there are tests in here which use On-Behalf-Of, so those tests will # fail if the server does not support mediation. class TestConnection(TestController): def test_01_get_service_document(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, http_impl=http) conn.get_service_document() # given that the client is fully functional, testing that the # service document parses and is valid is sufficient. This, obviously, # doesn't test the validation routine itself. assert conn.sd != None assert conn.sd.parsed == True assert conn.sd.valid == True