def test_33_get_ore_statement(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, on_behalf_of=SSS_OBO, http_impl=http) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="Multipart deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") with open(PACKAGE) as pkg: receipt = conn.create( col_iri=col.href, metadata_entry=e, payload=pkg, mimetype=PACKAGE_MIME, filename="example.zip", packaging='http://purl.org/net/sword/package/SimpleZip') # ensure that we have a receipt (the server may not give us one # by default) edit_iri = receipt.location receipt = conn.get_deposit_receipt(edit_iri) assert receipt.ore_statement_iri is not None # get the statement statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert isinstance(statement, Ore_Sword_Statement)
def test_36_check_md5(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri=col.href, metadata_entry=e) with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip', md5sum="123456789" ) # pass in a known md5 (even though it is wrong) statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) # need to try and extract the md5 from the dom count = 0 for element in statement.dom.findall( "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description/{http://vocab.ox.ac.uk/dataset/schema#}hasMD5" ): count += 1 assert element.text.strip() == "123456789" assert count == 1
def test_34_check_metadata_only_state(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri = col.href, metadata_entry = e) statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert len(statement.states) == 1 assert statement.states[0][0] == "http://databank.ox.ac.uk/state/EmptyContainer"
def test_34_check_metadata_only_state(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri = col.href, metadata_entry = e) statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert len(statement.states) == 1 assert statement.states[0][0] == "http://databank.ox.ac.uk/state/EmptyContainer"
def test_35_check_new_zip_state(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri = col.href, metadata_entry = e) with open(PACKAGE) as pkg: new_receipt = conn.update(dr = receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip') statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert len(statement.states) == 1 assert statement.states[0][0] == "http://databank.ox.ac.uk/state/ZipFileAdded"
def test_35_check_new_zip_state(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri = col.href, metadata_entry = e) with open(PACKAGE) as pkg: new_receipt = conn.update(dr = receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip') statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert len(statement.states) == 1 assert statement.states[0][0] == "http://databank.ox.ac.uk/state/ZipFileAdded"
def _check_dataset(self, dataset_submission): retry_counter = 0 exception = None while retry_counter < SwordStatementCheckThread.retry_count: try: # logger.info("Checking state of dataset at " + dataset_submission.remote_url) opener = openers.get_opener(dataset_submission.repository, dataset_submission.submitting_user) conn = Connection(error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) receipt = conn.get_deposit_receipt( dataset_submission.remote_url) statement = conn.get_ore_sword_statement( receipt.ore_statement_iri) for state_uri, state_desc in statement.states: logger.info("Dataset has state URI: " + state_uri) if state_uri in ERROR_STATES: dataset_submission.status = 'error' logger.info( "URI: " + state_uri + " is an error state ... setting 'error' state on submission record" ) break dataset_submission.last_accessed = datetime.datetime.now() dataset_submission.save() time.sleep(SwordStatementCheckThread.throttle) except urllib2.URLError as e: # if we get an exception, try again up to the limit logger.info( "Got error connecting to the server ... retrying " + str(retry_counter + 1) + " of " + str(SwordStatementCheckThread.retry_count)) retry_counter += 1 exception = e time.sleep(SwordStatementCheckThread.retry_delay) continue else: # if we don't get an exception, we're done return # if we don't return from the else statement above, it means the retries # all failed, and we have a problem. Raise the last thrown exception. raise exception
def test_36_check_md5(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri = col.href, metadata_entry = e) with open(PACKAGE) as pkg: new_receipt = conn.update(dr = receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip', md5sum="123456789") # pass in a known md5 (even though it is wrong) statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) # need to try and extract the md5 from the dom count = 0 for element in statement.dom.findall("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description/{http://vocab.ox.ac.uk/dataset/schema#}hasMD5"): count += 1 assert element.text.strip() == "123456789" assert count == 1
def _check_dataset(self, dataset_submission): retry_counter = 0 exception = None while retry_counter < SwordStatementCheckThread.retry_count: try: # logger.info("Checking state of dataset at " + dataset_submission.remote_url) opener = openers.get_opener(dataset_submission.repository, dataset_submission.submitting_user) conn = Connection(error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener)) receipt = conn.get_deposit_receipt(dataset_submission.remote_url) statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) for state_uri, state_desc in statement.states: logger.info("Dataset has state URI: " + state_uri) if state_uri in ERROR_STATES: dataset_submission.status = 'error' logger.info("URI: " + state_uri + " is an error state ... setting 'error' state on submission record") break dataset_submission.last_accessed = datetime.datetime.now() dataset_submission.save() time.sleep(SwordStatementCheckThread.throttle) except urllib2.URLError as e: # if we get an exception, try again up to the limit logger.info("Got error connecting to the server ... retrying " + str(retry_counter + 1) + " of " + str(SwordStatementCheckThread.retry_count)) retry_counter += 1 exception = e time.sleep(SwordStatementCheckThread.retry_delay) continue else: # if we don't get an exception, we're done return # if we don't return from the else statement above, it means the retries # all failed, and we have a problem. Raise the last thrown exception. raise exception
def test_33_get_ore_statement(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, on_behalf_of=SSS_OBO) conn.get_service_document() col = conn.sd.workspaces[0][1][0] #e = Entry(title="Multipart deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") with open(PACKAGE) as pkg: receipt = conn.create(col_iri = col.href, #metadata_entry = e, payload=pkg, mimetype=PACKAGE_MIME, filename="example.zip", packaging = 'http://purl.org/net/sword/package/SimpleZip') # ensure that we have a receipt (the server may not give us one # by default) edit_iri = receipt.location receipt = conn.get_deposit_receipt(edit_iri) assert receipt.ore_statement_iri is not None # get the statement statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert isinstance(statement, Ore_Sword_Statement)
def test_33_get_ore_statement(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri=col.href, metadata_entry=e) with open(PACKAGE) as pkg: new_receipt = conn.update( dr=receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip') # ensure that we have a receipt (the server may not give us one # by default) receipt = conn.get_deposit_receipt(receipt.location) assert receipt.ore_statement_iri is not None # get the statement statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert isinstance(statement, Ore_Sword_Statement) # some specific things that we can assert about the Statement # 1 - it should have the original deposits listed # 2 - it should have the aggregated resources listed # 3 - it should have the correct state # 4 - the dom should contain all the relevant metadata # check the original deposits od_uri = None assert len(statement.original_deposits) == 1 for od in statement.original_deposits: assert "update.zip" in od.uri assert od.is_original_deposit assert od.deposited_on is not None # assert od.deposited_by == SSS_UN # FIXME: this may not work until we get auth sorted out assert od.deposited_on_behalf_of is None od_uri = od.uri # check the aggregated resources assert len(statement.resources) == 1 for ar in statement.resources: # should be the same resource assert od_uri == ar.uri # check the states assert len(statement.states) == 1 assert statement.states[0][ 0] == "http://databank.ox.ac.uk/state/ZipFileAdded" print etree.tostring(statement.dom, pretty_print=True) # check the metadata md_count = 0 for e in statement.dom.findall(RDF + "Description"): for element in e.getchildren(): if element.tag == DC + "title": assert element.text.strip() == "An entry only deposit" md_count += 1 elif element.tag == DC + "abstract": assert element.text.strip() == "abstract" md_count += 1 elif element.tag == DC + "identifier": resource = element.attrib.get(RDF + "resource", None) if resource is not None: # because we know that there is going to be more than one identifier assert element.attrib.get( RDF + "resource") == "http://whatever/" md_count += 1 print "Metadata Count: " + str(md_count) assert md_count == 3
def test_33_get_ore_statement(self): conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW) conn.get_service_document() col = conn.sd.workspaces[0][1][0] e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/") receipt = conn.create(col_iri = col.href, metadata_entry = e) with open(PACKAGE) as pkg: new_receipt = conn.update(dr = receipt, payload=pkg, mimetype=PACKAGE_MIME, filename="update.zip", packaging='http://purl.org/net/sword/package/SimpleZip') # ensure that we have a receipt (the server may not give us one # by default) receipt = conn.get_deposit_receipt(receipt.location) assert receipt.ore_statement_iri is not None # get the statement statement = conn.get_ore_sword_statement(receipt.ore_statement_iri) assert isinstance(statement, Ore_Sword_Statement) # some specific things that we can assert about the Statement # 1 - it should have the original deposits listed # 2 - it should have the aggregated resources listed # 3 - it should have the correct state # 4 - the dom should contain all the relevant metadata # check the original deposits od_uri = None assert len(statement.original_deposits) == 1 for od in statement.original_deposits: assert "update.zip" in od.uri assert od.is_original_deposit assert od.deposited_on is not None # assert od.deposited_by == SSS_UN # FIXME: this may not work until we get auth sorted out assert od.deposited_on_behalf_of is None od_uri = od.uri # check the aggregated resources assert len(statement.resources) == 1 for ar in statement.resources: # should be the same resource assert od_uri == ar.uri # check the states assert len(statement.states) == 1 assert statement.states[0][0] == "http://databank.ox.ac.uk/state/ZipFileAdded" print etree.tostring(statement.dom, pretty_print=True) # check the metadata md_count = 0 for e in statement.dom.findall(RDF + "Description"): for element in e.getchildren(): if element.tag == DC + "title": assert element.text.strip() == "An entry only deposit" md_count += 1 elif element.tag == DC + "abstract": assert element.text.strip() == "abstract" md_count += 1 elif element.tag == DC + "identifier": resource = element.attrib.get(RDF + "resource", None) if resource is not None: # because we know that there is going to be more than one identifier assert element.attrib.get(RDF + "resource") == "http://whatever/" md_count += 1 print "Metadata Count: " + str(md_count) assert md_count == 3