def test_33_get_ore_statement(self):
        conn = Connection(SSS_URL,
                          user_name=SSS_UN,
                          user_pass=SSS_PW,
                          on_behalf_of=SSS_OBO,
                          http_impl=http)
        conn.get_service_document()
        col = conn.sd.workspaces[0][1][0]
        e = Entry(title="Multipart deposit",
                  id="asidjasidj",
                  dcterms_abstract="abstract",
                  dcterms_identifier="http://whatever/")
        with open(PACKAGE) as pkg:
            receipt = conn.create(
                col_iri=col.href,
                metadata_entry=e,
                payload=pkg,
                mimetype=PACKAGE_MIME,
                filename="example.zip",
                packaging='http://purl.org/net/sword/package/SimpleZip')

        # ensure that we have a receipt (the server may not give us one
        # by default)
        edit_iri = receipt.location
        receipt = conn.get_deposit_receipt(edit_iri)

        assert receipt.ore_statement_iri is not None

        # get the statement
        statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)

        assert isinstance(statement, Ore_Sword_Statement)
    def test_36_check_md5(self):
        conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW)
        conn.get_service_document()
        col = conn.sd.workspaces[0][1][0]
        e = Entry(title="An entry only deposit",
                  id="asidjasidj",
                  dcterms_abstract="abstract",
                  dcterms_identifier="http://whatever/")
        receipt = conn.create(col_iri=col.href, metadata_entry=e)
        with open(PACKAGE) as pkg:
            new_receipt = conn.update(
                dr=receipt,
                payload=pkg,
                mimetype=PACKAGE_MIME,
                filename="update.zip",
                packaging='http://purl.org/net/sword/package/SimpleZip',
                md5sum="123456789"
            )  # pass in a known md5 (even though it is wrong)
        statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)

        # need to try and extract the md5 from the dom
        count = 0
        for element in statement.dom.findall(
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description/{http://vocab.ox.ac.uk/dataset/schema#}hasMD5"
        ):
            count += 1
            assert element.text.strip() == "123456789"

        assert count == 1
    def test_34_check_metadata_only_state(self):
        conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW)
        conn.get_service_document()
        col = conn.sd.workspaces[0][1][0]
        e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/")
        receipt = conn.create(col_iri = col.href, metadata_entry = e)
        statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)

        assert len(statement.states) == 1
        assert statement.states[0][0] == "http://databank.ox.ac.uk/state/EmptyContainer"
    def test_34_check_metadata_only_state(self):
        conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW)
        conn.get_service_document()
        col = conn.sd.workspaces[0][1][0]
        e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/")
        receipt = conn.create(col_iri = col.href, metadata_entry = e)
        statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)

        assert len(statement.states) == 1
        assert statement.states[0][0] == "http://databank.ox.ac.uk/state/EmptyContainer"
 def test_35_check_new_zip_state(self):
     conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW)
     conn.get_service_document()
     col = conn.sd.workspaces[0][1][0]
     e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/")
     receipt = conn.create(col_iri = col.href, metadata_entry = e)
     with open(PACKAGE) as pkg:
         new_receipt = conn.update(dr = receipt,
                         payload=pkg,
                         mimetype=PACKAGE_MIME,
                         filename="update.zip",
                         packaging='http://purl.org/net/sword/package/SimpleZip')
     statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)
     
     assert len(statement.states) == 1
     assert statement.states[0][0] == "http://databank.ox.ac.uk/state/ZipFileAdded"
 def test_35_check_new_zip_state(self):
     conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW)
     conn.get_service_document()
     col = conn.sd.workspaces[0][1][0]
     e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/")
     receipt = conn.create(col_iri = col.href, metadata_entry = e)
     with open(PACKAGE) as pkg:
         new_receipt = conn.update(dr = receipt,
                         payload=pkg,
                         mimetype=PACKAGE_MIME,
                         filename="update.zip",
                         packaging='http://purl.org/net/sword/package/SimpleZip')
     statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)
     
     assert len(statement.states) == 1
     assert statement.states[0][0] == "http://databank.ox.ac.uk/state/ZipFileAdded"
Exemple #7
0
    def _check_dataset(self, dataset_submission):
        retry_counter = 0
        exception = None
        while retry_counter < SwordStatementCheckThread.retry_count:
            try:
                # logger.info("Checking state of dataset at " + dataset_submission.remote_url)

                opener = openers.get_opener(dataset_submission.repository,
                                            dataset_submission.submitting_user)
                conn = Connection(error_response_raises_exceptions=False,
                                  http_impl=UrlLib2Layer(opener))
                receipt = conn.get_deposit_receipt(
                    dataset_submission.remote_url)
                statement = conn.get_ore_sword_statement(
                    receipt.ore_statement_iri)
                for state_uri, state_desc in statement.states:
                    logger.info("Dataset has state URI: " + state_uri)
                    if state_uri in ERROR_STATES:
                        dataset_submission.status = 'error'
                        logger.info(
                            "URI: " + state_uri +
                            " is an error state ... setting 'error' state on submission record"
                        )
                        break
                dataset_submission.last_accessed = datetime.datetime.now()
                dataset_submission.save()
                time.sleep(SwordStatementCheckThread.throttle)

            except urllib2.URLError as e:
                # if we get an exception, try again up to the limit
                logger.info(
                    "Got error connecting to the server ... retrying " +
                    str(retry_counter + 1) + " of " +
                    str(SwordStatementCheckThread.retry_count))
                retry_counter += 1
                exception = e
                time.sleep(SwordStatementCheckThread.retry_delay)
                continue

            else:
                # if we don't get an exception, we're done
                return

        # if we don't return from the else statement above, it means the retries
        # all failed, and we have a problem.  Raise the last thrown exception.
        raise exception
 def test_36_check_md5(self):
     conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW)
     conn.get_service_document()
     col = conn.sd.workspaces[0][1][0]
     e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/")
     receipt = conn.create(col_iri = col.href, metadata_entry = e)
     with open(PACKAGE) as pkg:
         new_receipt = conn.update(dr = receipt,
                         payload=pkg,
                         mimetype=PACKAGE_MIME,
                         filename="update.zip",
                         packaging='http://purl.org/net/sword/package/SimpleZip',
                         md5sum="123456789") # pass in a known md5 (even though it is wrong)
     statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)
     
     # need to try and extract the md5 from the dom
     count = 0
     for element in statement.dom.findall("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description/{http://vocab.ox.ac.uk/dataset/schema#}hasMD5"):
         count += 1
         assert element.text.strip() == "123456789"
     
     assert count == 1
 def _check_dataset(self, dataset_submission):
     retry_counter = 0
     exception = None
     while retry_counter < SwordStatementCheckThread.retry_count:
         try:
            # logger.info("Checking state of dataset at " + dataset_submission.remote_url)
             
             opener = openers.get_opener(dataset_submission.repository,
                                     dataset_submission.submitting_user)
             conn = Connection(error_response_raises_exceptions=False, http_impl=UrlLib2Layer(opener))
             receipt = conn.get_deposit_receipt(dataset_submission.remote_url)
             statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)
             for state_uri, state_desc in statement.states:
                 logger.info("Dataset has state URI: " + state_uri)
                 if state_uri in ERROR_STATES:
                     dataset_submission.status = 'error'
                     logger.info("URI: " + state_uri + " is an error state ... setting 'error' state on submission record")
                     break
             dataset_submission.last_accessed = datetime.datetime.now()
             dataset_submission.save()
             time.sleep(SwordStatementCheckThread.throttle)
     
         except urllib2.URLError as e:
             # if we get an exception, try again up to the limit
             logger.info("Got error connecting to the server ... retrying " + str(retry_counter + 1) + " of " + str(SwordStatementCheckThread.retry_count))
             retry_counter += 1
             exception = e
             time.sleep(SwordStatementCheckThread.retry_delay)
             continue
             
         else:
             # if we don't get an exception, we're done
             return
     
     # if we don't return from the else statement above, it means the retries
     # all failed, and we have a problem.  Raise the last thrown exception.
     raise exception
 def test_33_get_ore_statement(self):
     conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW, on_behalf_of=SSS_OBO)
     conn.get_service_document()
     col = conn.sd.workspaces[0][1][0]
     #e = Entry(title="Multipart deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/")
     with open(PACKAGE) as pkg:
         receipt = conn.create(col_iri = col.href,
                     #metadata_entry = e,
                     payload=pkg, 
                     mimetype=PACKAGE_MIME, 
                     filename="example.zip",
                     packaging = 'http://purl.org/net/sword/package/SimpleZip')
     
     # ensure that we have a receipt (the server may not give us one
     # by default)
     edit_iri = receipt.location
     receipt = conn.get_deposit_receipt(edit_iri)
     
     assert receipt.ore_statement_iri is not None
     
     # get the statement
     statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)
     
     assert isinstance(statement, Ore_Sword_Statement)
    def test_33_get_ore_statement(self):
        conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW)
        conn.get_service_document()
        col = conn.sd.workspaces[0][1][0]
        e = Entry(title="An entry only deposit",
                  id="asidjasidj",
                  dcterms_abstract="abstract",
                  dcterms_identifier="http://whatever/")
        receipt = conn.create(col_iri=col.href, metadata_entry=e)
        with open(PACKAGE) as pkg:
            new_receipt = conn.update(
                dr=receipt,
                payload=pkg,
                mimetype=PACKAGE_MIME,
                filename="update.zip",
                packaging='http://purl.org/net/sword/package/SimpleZip')

        # ensure that we have a receipt (the server may not give us one
        # by default)
        receipt = conn.get_deposit_receipt(receipt.location)

        assert receipt.ore_statement_iri is not None

        # get the statement
        statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)

        assert isinstance(statement, Ore_Sword_Statement)

        # some specific things that we can assert about the Statement
        # 1 - it should have the original deposits listed
        # 2 - it should have the aggregated resources listed
        # 3 - it should have the correct state
        # 4 - the dom should contain all the relevant metadata

        # check the original deposits
        od_uri = None
        assert len(statement.original_deposits) == 1
        for od in statement.original_deposits:
            assert "update.zip" in od.uri
            assert od.is_original_deposit
            assert od.deposited_on is not None
            # assert od.deposited_by == SSS_UN # FIXME: this may not work until we get auth sorted out
            assert od.deposited_on_behalf_of is None
            od_uri = od.uri

        # check the aggregated resources
        assert len(statement.resources) == 1
        for ar in statement.resources:
            # should be the same resource
            assert od_uri == ar.uri

        # check the states
        assert len(statement.states) == 1
        assert statement.states[0][
            0] == "http://databank.ox.ac.uk/state/ZipFileAdded"

        print etree.tostring(statement.dom, pretty_print=True)

        # check the metadata
        md_count = 0
        for e in statement.dom.findall(RDF + "Description"):
            for element in e.getchildren():
                if element.tag == DC + "title":
                    assert element.text.strip() == "An entry only deposit"
                    md_count += 1
                elif element.tag == DC + "abstract":
                    assert element.text.strip() == "abstract"
                    md_count += 1
                elif element.tag == DC + "identifier":
                    resource = element.attrib.get(RDF + "resource", None)
                    if resource is not None:  # because we know that there is going to be more than one identifier
                        assert element.attrib.get(
                            RDF + "resource") == "http://whatever/"
                        md_count += 1

        print "Metadata Count: " + str(md_count)
        assert md_count == 3
 def test_33_get_ore_statement(self):
     conn = Connection(SSS_URL, user_name=SSS_UN, user_pass=SSS_PW)
     conn.get_service_document()
     col = conn.sd.workspaces[0][1][0]
     e = Entry(title="An entry only deposit", id="asidjasidj", dcterms_abstract="abstract", dcterms_identifier="http://whatever/")
     receipt = conn.create(col_iri = col.href, metadata_entry = e)
     with open(PACKAGE) as pkg:
         new_receipt = conn.update(dr = receipt,
                         payload=pkg,
                         mimetype=PACKAGE_MIME,
                         filename="update.zip",
                         packaging='http://purl.org/net/sword/package/SimpleZip')
     
     # ensure that we have a receipt (the server may not give us one
     # by default)
     receipt = conn.get_deposit_receipt(receipt.location)
     
     assert receipt.ore_statement_iri is not None
     
     # get the statement
     statement = conn.get_ore_sword_statement(receipt.ore_statement_iri)
     
     assert isinstance(statement, Ore_Sword_Statement)
     
     # some specific things that we can assert about the Statement
     # 1 - it should have the original deposits listed
     # 2 - it should have the aggregated resources listed
     # 3 - it should have the correct state
     # 4 - the dom should contain all the relevant metadata
     
     # check the original deposits
     od_uri = None
     assert len(statement.original_deposits) == 1
     for od in statement.original_deposits:
         assert "update.zip" in od.uri
         assert od.is_original_deposit
         assert od.deposited_on is not None
         # assert od.deposited_by == SSS_UN # FIXME: this may not work until we get auth sorted out
         assert od.deposited_on_behalf_of is None
         od_uri = od.uri
     
     # check the aggregated resources
     assert len(statement.resources) == 1
     for ar in statement.resources:
         # should be the same resource
         assert od_uri == ar.uri
     
     # check the states
     assert len(statement.states) == 1
     assert statement.states[0][0] == "http://databank.ox.ac.uk/state/ZipFileAdded"
     
     print etree.tostring(statement.dom, pretty_print=True)
     
     # check the metadata
     md_count = 0
     for e in statement.dom.findall(RDF + "Description"):
         for element in e.getchildren():
             if element.tag == DC + "title":
                 assert element.text.strip() == "An entry only deposit"
                 md_count += 1
             elif element.tag == DC + "abstract":
                 assert element.text.strip() == "abstract"
                 md_count += 1
             elif element.tag == DC + "identifier":
                 resource = element.attrib.get(RDF + "resource", None)
                 if resource is not None: # because we know that there is going to be more than one identifier
                     assert element.attrib.get(RDF + "resource") == "http://whatever/"
                     md_count += 1
             
     print "Metadata Count: " + str(md_count)
     assert md_count == 3