def test_17_advanced_replace_file_content(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, on_behalf_of=SSS_OBO, http_impl=http) conn.get_service_document() col = conn.sd.workspaces[0][1][0] with open(PACKAGE) as pkg: receipt = conn.create( col_iri=col.href, payload=pkg, mimetype=PACKAGE_MIME, filename="example.zip", packaging='http://purl.org/net/sword/package/SimpleZip') # ensure that we have a receipt (the server may not give us one # by default) receipt = conn.get_deposit_receipt(receipt.location) # now do the replace with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip', metadata_relevant=True) assert new_receipt.code == 204 assert new_receipt.dom is None
def __init__(self, owner): raise NotImplementedError c = Connection(SD_URI, user_name = owner.username, user_pass=owner.password) c.get_service_document() # pick the first collection within the first workspace: workspace_1_title, workspace_1_collections = c.workspaces[0] collection = workspace_1_collections[0] # upload "package.zip" to this collection as a new (binary) resource: with open("package.zip", "r") as pkg: receipt = c.create(col_iri = collection.href, payload = pkg, mimetype = "application/zip", filename = "package.zip", packaging = 'http://purl.org/net/sword/package/Binary', in_progress = True) # As the deposit isn't yet finished # Add a metadata record to this newly created resource (or 'container') from sword2 import Entry # Entry can be passed keyword parameters to add metadata to the entry (namespace + '_' + tagname) e = Entry(id="atomid", title="atom-title", dcterms_abstract = "Info about the resource....", ) # to add a new namespace: e.register_namespace('skos', 'http://www.w3.org/2004/02/skos/core#') e.add_field("skos_Concept", "...") # Update the metadata entry to the resource: updated_receipt = c.update(metadata_entry = e, dr = receipt, # use the receipt to discover the right URI to use in_progress = False) # finish the deposit
def test_19_advanced_replace_metadata(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, on_behalf_of=SSS_OBO, http_impl=http) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri=col.href, metadata_entry=e) # ensure that we have a receipt (the server may not give us one # by default) receipt = conn.get_deposit_receipt(receipt.location) # now do the replace ne = Entry(title="A metadata update", id="asidjasidj", dcterms_abstract="new abstract", dcterms_identifier="http://elsewhere/") new_receipt = conn.update(dr=receipt, metadata_entry=ne, in_progress=True) assert new_receipt.code == 204 or new_receipt.code == 200 if new_receipt.code == 204: assert new_receipt.dom is None if new_receipt.code == 200: assert new_receipt.parsed == True assert new_receipt.valid == True
def test_36_check_md5(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri=col.href, metadata_entry=e) with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip', md5sum="123456789" ) # pass in a known md5 (even though it is wrong) statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) # need to try and extract the md5 from the dom count = 0 for element in statement.dom.findall( "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description/{http://vocab.ox.ac.uk/dataset/schema#}hasMD5" ): count += 1 assert element.text.strip() == "123456789" assert count == 1
def test_10_Multipart_POST_then_update_on_EM_IRI(self): conn = Connection("http://localhost:%s/sd-uri" % PORT_NUMBER, user_name="sword", user_pass="******", download_service_document=True) e = Entry(title="Foo", id="asidjasidj", dcterms_appendix="blah blah", dcterms_title="foo bar") deposit_receipt = conn.create( payload="Multipart_POST_then_update_on_EM_IRI", metadata_entry=e, mimetype="text/plain", filename="readme.txt", packaging='http://purl.org/net/sword/package/Binary', col_iri=conn.workspaces[0][1][0].href, in_progress=True) assert deposit_receipt.edit_media != None dr = conn.update( payload="Multipart_POST_then_update_on_EM_IRI -- updated resource", mimetype="text/plain", filename="readthis.txt", packaging="http://purl.org/net/sword/package/Binary", edit_media_iri=deposit_receipt.edit_media) assert dr.code == 204 # empty response
def test_17_advanced_replace_file_content(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, on_behalf_of=SSS_OBO) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri=col.href, metadata_entry=e) # ensure that we have a receipt (the server may not give us one # by default) receipt = conn.get_deposit_receipt(receipt.location) # now do the replace with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip', metadata_relevant=True) assert new_receipt.code == 204 assert new_receipt.dom is None
def test_01_massive_file(self): http = UrlLib2Layer() conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, http_impl=http) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="scalability testing", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri=col.href, metadata_entry=e) receipt = conn.get_deposit_receipt(receipt.location) # now do the replace with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="massive_file.zip", packaging='http://purl.org/net/sword/package/Binary') assert new_receipt.code == 204
def complete_submission(self, edit_uri): opener = self.get_opener() conn = Connection(self.sword2_sd_url, error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) receipt = None try: receipt = conn.get_deposit_receipt(edit_uri) except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all # don't do anything about it here - we'll try again in a moment # and then error out appropriately later pass # at this stage we need to ensure that we actually got back a deposit # receipt i = 0 while (receipt is None or receipt.code >= 400) and i < self.retry_limit: err = None if receipt is None: err = "<unable to reach server>" else: err = str(receipt.code) logger.debug( "Attempt to retrieve Entry Document failed with error " + str(err) + " ... trying again in " + str(self.retry_delay) + " seconds") i += 1 time.sleep(self.retry_delay) try: receipt = conn.get_deposit_receipt(edit_uri) except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all # just try again up to the retry_limit continue self.assertIsNotNone(receipt) self.assertEquals(receipt.code, 200) # if we get to here we can go ahead with the deposit for real with open(self.zipFileName, "rb") as data: new_receipt = conn.update( dr=receipt, payload=data, mimetype="application/zip", filename=self.dataset_identifier + ".zip", packaging='http://dataflow.ox.ac.uk/package/DataBankBagIt') self.assertIsNotNone(new_receipt) self.assertEquals(new_receipt.code, 204) return
def test_35_check_new_zip_state(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri = col.href, metadata_entry = e) with open(PACKAGE) as pkg: new_receipt = conn.update(dr = receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip') statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert len(statement.states) == 1 assert statement.states[0][0] == "http://databank.ox.ac.uk/state/ZipFileAdded"
def test_21_advanced_replace_with_multipart(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, on_behalf_of=SSS_OBO, http_impl=http) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="Multipart deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") with open(PACKAGE) as pkg: receipt = conn.create( col_iri=col.href, metadata_entry=e, payload=pkg, mimetype=PACKAGE_MIME, filename="example.zip", packaging='http://purl.org/net/sword/package/SimpleZip') # ensure that we have a receipt (the server may not give us one # by default) receipt = conn.get_deposit_receipt(receipt.location) # now do the replace ne = Entry(title="A multipart update", id="asidjasidj", dcterms_abstract="new abstract", dcterms_identifier="http://elsewhere/") with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, metadata_entry=ne, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip', in_progress=True) assert new_receipt.code == 204 or new_receipt.code == 200 if new_receipt.code == 204: assert new_receipt.dom is None if new_receipt.code == 200: assert new_receipt.parsed == True assert new_receipt.valid == True
def complete_submission(self, dataset, opener, dataset_submission, filename, retry_limit=3, retry_delay=2): logger.debug("Carrying out complete submission") # create a connection repository = dataset_submission.repository conn = Connection(repository.sword2_sd_url, error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) conn.get_service_document() # we require there to be at least one workspace if conn.sd is None: raise SwordServiceError("did not successfully retrieve a service document") if conn.sd.workspaces is None: raise SwordServiceError("no workspaces defined in service document") if len(conn.sd.workspaces) == 0: raise SwordServiceError("no workspaces defined in service document") # get hold of a copy of the deposit reciept edit_uri = dataset_submission.remote_url receipt = None try: receipt = conn.get_deposit_receipt(edit_uri) except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all # don't do anything about it here - we'll try again in a moment # and then error out appropriately later pass # at this stage we need to ensure that we actually got back a deposit # receipt i = 0 while (receipt is None or receipt.code >= 400) and i < retry_limit: err = None if receipt is None: err = "<unable to reach server>" else: err = str(receipt.code) logger.debug("Attempt to retrieve Entry Document failed with error " + str(err) + " ... trying again in " + str(retry_delay) + " seconds") i += 1 time.sleep(retry_delay) try: receipt = conn.get_deposit_receipt(edit_uri) except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all # just try again up to the retry_limit continue # if we get to here, and the receipt code is still an error, we give # up and re-set the item status if receipt is None or receipt.code >= 400: return self._set_error(dataset_submission, "error", "Attempt to retrieve Entry Document failed " + str(retry_limit + 1) + " times ... giving up") logger.debug("Entry Document retrieved ... continuing to full package deposit") # if we get to here we can go ahead with the deposit for real try: with open(filename, "rb") as data: new_receipt = conn.update(dr = receipt, payload=data, metadata_relevant = True, mimetype="application/zip", filename=dataset.identifier + ".zip", packaging='http://dataflow.ox.ac.uk/package/DataBankBagIt') if new_receipt.code >= 400: return self._set_error(dataset_submission, "error", "Attempt to deposit content failed") except urllib2.URLError as e: # The sword2 client does not catch network errors like this one, # which indicates that the url couldn't be reached at all return self._set_error(dataset_submission, "error", "Attempt to deposit content failed")
def test_33_get_ore_statement(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri=col.href, metadata_entry=e) with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip') # ensure that we have a receipt (the server may not give us one # by default) receipt = conn.get_deposit_receipt(receipt.location) assert receipt.ore_statement_iri is not None # get the statement statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert isinstance(statement, Ore_Sword_Statement) # some specific things that we can assert about the Statement # 1 - it should have the original deposits listed # 2 - it should have the aggregated resources listed # 3 - it should have the correct state # 4 - the dom should contain all the relevant metadata # check the original deposits od_uri = None assert len(statement.original_deposits) == 1 for od in statement.original_deposits: assert "update.zip" in od.uri assert od.is_original_deposit assert od.deposited_on is not None # assert od.deposited_by == SSS_UN # FIXME: this may not work until we get auth sorted out assert od.deposited_on_behalf_of is None od_uri = od.uri # check the aggregated resources assert len(statement.resources) == 1 for ar in statement.resources: # should be the same resource assert od_uri == ar.uri # check the states assert len(statement.states) == 1 assert statement.states[0][ 0] == "http://databank.ox.ac.uk/state/ZipFileAdded" print etree.tostring(statement.dom, pretty_print=True) # check the metadata md_count = 0 for e in statement.dom.findall(RDF + "Description"): for element in e.getchildren(): if element.tag == DC + "title": assert element.text.strip() == "An entry only deposit" md_count += 1 elif element.tag == DC + "abstract": assert element.text.strip() == "abstract" md_count += 1 elif element.tag == DC + "identifier": resource = element.attrib.get(RDF + "resource", None) if resource is not None: # because we know that there is going to be more than one identifier assert element.attrib.get( RDF + "resource") == "http://whatever/" md_count += 1 print "Metadata Count: " + str(md_count) assert md_count == 3