コード例 #1
0
def parse_reingest_mets(transfer_uuid, transfer_path):
    # Parse METS to extract information needed by later microservices
    mets_path = find_mets_file(transfer_path)
    try:
        root = etree.parse(mets_path)
    except Exception:
        print('Error parsing reingest METS', mets_path, ' - skipping')
        logger.info('Error parsing reingest mets %s - skipping', mets_path, exc_info=True)
        return

    # Get SIP UUID from METS name
    sip_uuid = os.path.basename(mets_path).replace('METS.', '').replace('.xml', '')
    # Note: Because DublinCore and PREMIS rights are not database-level foreign keys, this works even though the SIP may not exist yet
    parse_mets_to_db.parse_dc(sip_uuid, root)
    parse_mets_to_db.parse_rights(sip_uuid, root)
コード例 #2
0
 def test_parse_multiple_rights(self):
     """ It should only parse the most recent rights. """
     sip_uuid = '50d65db1-86cd-4579-80af-8d9c0dbd7fca'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_updated_rights.xml'))
     rights_list = parse_mets_to_db.parse_rights(sip_uuid, root)
     assert rights_list
     rights = models.RightsStatement.objects.get(metadataappliestoidentifier=sip_uuid, rightsbasis='Statute')
     assert rights.rightsstatementidentifiertype == ''
     assert rights.rightsstatementidentifiervalue == ''
     assert rights.rightsholder == 0
     assert rights.rightsbasis == 'Statute'
     assert rights.status == 'REINGEST'
     st = models.RightsStatementStatuteInformation.objects.get(rightsstatement=rights)
     assert st.statutejurisdiction == 'British Columbia, Canada'
     assert st.statutecitation == 'Freedom of Information Act and Protection of Privacy Act'
     assert st.statutedeterminationdate == '2015'
     assert st.statuteapplicablestartdate == '2000'
     assert st.statuteapplicableenddate is None
     assert st.statuteenddateopen is True
     di = models.RightsStatementStatuteDocumentationIdentifier.objects.get(rightsstatementstatute=st)
     assert di.statutedocumentationidentifiertype == 'Doc'
     assert di.statutedocumentationidentifiervalue == '1'
     assert di.statutedocumentationidentifierrole == '-'
     note = models.RightsStatementStatuteInformationNote.objects.get(rightsstatementstatute=st)
     assert note.statutenote == 'SIN and health numbers'
     rg = models.RightsStatementRightsGranted.objects.get(rightsstatement=rights)
     assert rg.act == 'Disseminate'
     assert rg.startdate == '2000'
     assert rg.enddate is None
     assert rg.enddateopen is True
     rgnote = models.RightsStatementRightsGrantedNote.objects.get(rightsgranted=rg)
     assert rgnote.rightsgrantednote == ''
     rgrestriction = models.RightsStatementRightsGrantedRestriction.objects.get(rightsgranted=rg)
     assert rgrestriction.restriction == 'Disallow'
コード例 #3
0
 def test_parse_donor(self):
     """ It should parse donor rights. """
     pass
     sip_uuid = '50d65db1-86cd-4579-80af-8d9c0dbd7fca'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_all_rights.xml'))
     rights_list = parse_mets_to_db.parse_rights(sip_uuid, root)
     assert rights_list
     rights = models.RightsStatement.objects.get(metadataappliestoidentifier=sip_uuid, rightsbasis='Donor')
     assert rights.rightsstatementidentifiertype == ''
     assert rights.rightsstatementidentifiervalue == ''
     assert rights.rightsholder == 0
     assert rights.rightsbasis == 'Donor'
     assert rights.status == 'REINGEST'
     other = models.RightsStatementOtherRightsInformation.objects.get(rightsstatement=rights)
     assert other.otherrightsbasis == 'Donor'
     assert other.otherrightsapplicablestartdate == '2000-01-01'
     assert other.otherrightsapplicableenddate == '2020-01-01'
     assert other.otherrightsenddateopen is False
     di = models.RightsStatementOtherRightsDocumentationIdentifier.objects.get(rightsstatementotherrights=other)
     assert di.otherrightsdocumentationidentifiertype == 'DID'
     assert di.otherrightsdocumentationidentifiervalue == '1'
     assert di.otherrightsdocumentationidentifierrole == '-'
     note = models.RightsStatementOtherRightsInformationNote.objects.get(rightsstatementotherrights=other)
     assert note.otherrightsnote == 'Contact in 2010 for earlier'
     rg = models.RightsStatementRightsGranted.objects.get(rightsstatement=rights)
     assert rg.act == 'Publish'
     assert rg.startdate == '2000-01-01'
     assert rg.enddate == '2020-01-01'
     assert rg.enddateopen is False
     rgnote = models.RightsStatementRightsGrantedNote.objects.get(rightsgranted=rg)
     assert rgnote.rightsgrantednote == ''
     rgrestriction = models.RightsStatementRightsGrantedRestriction.objects.get(rightsgranted=rg)
     assert rgrestriction.restriction == 'Conditional'
コード例 #4
0
 def test_parse_policy(self):
     """ It should parse policy rights. """
     pass
     sip_uuid = '50d65db1-86cd-4579-80af-8d9c0dbd7fca'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_all_rights.xml'))
     rights_list = parse_mets_to_db.parse_rights(sip_uuid, root)
     assert rights_list
     rights = models.RightsStatement.objects.get(metadataappliestoidentifier=sip_uuid, rightsbasis='Policy')
     assert rights.rightsstatementidentifiertype == ''
     assert rights.rightsstatementidentifiervalue == ''
     assert rights.rightsholder == 0
     assert rights.rightsbasis == 'Policy'
     assert rights.status == 'REINGEST'
     other = models.RightsStatementOtherRightsInformation.objects.get(rightsstatement=rights)
     assert other.otherrightsbasis == 'Policy'
     assert other.otherrightsapplicablestartdate == '1989'
     assert other.otherrightsapplicableenddate is None
     assert other.otherrightsenddateopen is True
     di = models.RightsStatementOtherRightsDocumentationIdentifier.objects.get(rightsstatementotherrights=other)
     assert di.otherrightsdocumentationidentifiertype == ''
     assert di.otherrightsdocumentationidentifiervalue == ''
     assert di.otherrightsdocumentationidentifierrole == ''
     note = models.RightsStatementOtherRightsInformationNote.objects.get(rightsstatementotherrights=other)
     assert note.otherrightsnote == 'Pubic relations office only'
     rg = models.RightsStatementRightsGranted.objects.get(rightsstatement=rights)
     assert rg.act == 'Disseminate'
     assert rg.startdate == '1989-01-01'
     assert rg.enddate is None
     assert rg.enddateopen is True
     rgnote = models.RightsStatementRightsGrantedNote.objects.get(rightsgranted=rg)
     assert rgnote.rightsgrantednote == ''
     rgrestriction = models.RightsStatementRightsGrantedRestriction.objects.get(rightsgranted=rg)
     assert rgrestriction.restriction == 'Conditional'
コード例 #5
0
 def test_parse_license(self):
     """ It should parse license rights. """
     sip_uuid = '50d65db1-86cd-4579-80af-8d9c0dbd7fca'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_all_rights.xml'))
     rights_list = parse_mets_to_db.parse_rights(sip_uuid, root)
     assert rights_list
     rights = models.RightsStatement.objects.get(metadataappliestoidentifier=sip_uuid, rightsbasis='License')
     assert rights.rightsstatementidentifiertype == ''
     assert rights.rightsstatementidentifiervalue == ''
     assert rights.rightsholder == 0
     assert rights.rightsbasis == 'License'
     assert rights.status == 'REINGEST'
     li = models.RightsStatementLicense.objects.get(rightsstatement=rights)
     assert li.licenseterms == 'CC-BY-SA'
     assert li.licenseapplicablestartdate == '2015'
     assert li.licenseapplicableenddate is None
     assert li.licenseenddateopen is True
     di = models.RightsStatementLicenseDocumentationIdentifier.objects.get(rightsstatementlicense=li)
     assert di.licensedocumentationidentifiertype == ''
     assert di.licensedocumentationidentifiervalue == ''
     assert di.licensedocumentationidentifierrole == ''
     note = models.RightsStatementLicenseNote.objects.get(rightsstatementlicense=li)
     assert note.licensenote == ''
     rg = models.RightsStatementRightsGranted.objects.get(rightsstatement=rights)
     assert rg.act == 'Disseminate'
     assert rg.startdate == '2015'
     assert rg.enddate is None
     assert rg.enddateopen is True
     rgnote = models.RightsStatementRightsGrantedNote.objects.get(rightsgranted=rg)
     assert rgnote.rightsgrantednote == 'Attribution required'
     rgrestriction = models.RightsStatementRightsGrantedRestriction.objects.get(rightsgranted=rg)
     assert rgrestriction.restriction == 'Allow'
コード例 #6
0
 def test_none_found(self):
     """ It should parse no rights if none found. """
     sip_uuid = 'd481580e-53b9-4a52-96db-baa969e78adc'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_no_metadata.xml'))
     rights = parse_mets_to_db.parse_rights(sip_uuid, root)
     assert rights == []
     assert models.RightsStatement.objects.filter(metadataappliestoidentifier=sip_uuid).exists() is False
コード例 #7
0
 def test_parse_copyright(self):
     """
     It should parse copyright rights.
     It should parse multiple rightsGranted.
     """
     sip_uuid = '50d65db1-86cd-4579-80af-8d9c0dbd7fca'
     root = etree.parse(
         os.path.join(THIS_DIR, 'fixtures', 'mets_all_rights.xml'))
     rights_list = parse_mets_to_db.parse_rights(Job("stub", "stub", []),
                                                 sip_uuid, root)
     assert rights_list
     rights = models.RightsStatement.objects.get(
         metadataappliestoidentifier=sip_uuid, rightsbasis='Copyright')
     assert rights.rightsstatementidentifiertype == ''
     assert rights.rightsstatementidentifiervalue == ''
     assert rights.rightsholder == 0
     assert rights.rightsbasis == 'Copyright'
     assert rights.status == 'REINGEST'
     cr = models.RightsStatementCopyright.objects.get(
         rightsstatement=rights)
     assert cr.copyrightstatus == 'Under copyright'
     assert cr.copyrightjurisdiction == 'CA'
     assert cr.copyrightstatusdeterminationdate == '2015'
     assert cr.copyrightapplicablestartdate == '1990'
     assert cr.copyrightapplicableenddate is None
     assert cr.copyrightenddateopen is True
     di = models.RightsStatementCopyrightDocumentationIdentifier.objects.get(
         rightscopyright=cr)
     assert di.copyrightdocumentationidentifiertype == ''
     assert di.copyrightdocumentationidentifiervalue == ''
     assert di.copyrightdocumentationidentifierrole == ''
     note = models.RightsStatementCopyrightNote.objects.get(
         rightscopyright=cr)
     assert note.copyrightnote == 'Copyright expires 2010'
     rg = models.RightsStatementRightsGranted.objects.filter(
         rightsstatement=rights)
     assert len(rg) == 2
     assert rg[0].act == 'Disseminate'
     assert rg[0].startdate == '2000'
     assert rg[0].enddate is None
     assert rg[0].enddateopen is True
     rgnote = models.RightsStatementRightsGrantedNote.objects.get(
         rightsgranted=rg[0])
     assert rgnote.rightsgrantednote == 'Attribution required'
     rgrestriction = models.RightsStatementRightsGrantedRestriction.objects.get(
         rightsgranted=rg[0])
     assert rgrestriction.restriction == 'Allow'
     assert rg[1].act == 'Access'
     assert rg[1].startdate == '1999'
     assert rg[1].enddate is None
     assert rg[1].enddateopen is True
     rgnote = models.RightsStatementRightsGrantedNote.objects.get(
         rightsgranted=rg[1])
     assert rgnote.rightsgrantednote == 'Access one year before dissemination'
     rgrestriction = models.RightsStatementRightsGrantedRestriction.objects.get(
         rightsgranted=rg[1])
     assert rgrestriction.restriction == 'Allow'
コード例 #8
0
 def test_parse_statute(self):
     """ It should parse statute rights. """
     sip_uuid = '50d65db1-86cd-4579-80af-8d9c0dbd7fca'
     root = etree.parse(
         os.path.join(THIS_DIR, 'fixtures', 'mets_all_rights.xml'))
     rights_list = parse_mets_to_db.parse_rights(Job("stub", "stub", []),
                                                 sip_uuid, root)
     assert rights_list
     rights = models.RightsStatement.objects.get(
         metadataappliestoidentifier=sip_uuid, rightsbasis='Statute')
     assert rights.rightsstatementidentifiertype == ''
     assert rights.rightsstatementidentifiervalue == ''
     assert rights.rightsholder == 0
     assert rights.rightsbasis == 'Statute'
     assert rights.status == 'REINGEST'
     st = models.RightsStatementStatuteInformation.objects.get(
         rightsstatement=rights)
     assert st.statutejurisdiction == 'BC, Canada'
     assert st.statutecitation == 'Freedom of Information Act'
     assert st.statutedeterminationdate == '2011'
     assert st.statuteapplicablestartdate == '1994'
     assert st.statuteapplicableenddate == '2094'
     assert st.statuteenddateopen is False
     di = models.RightsStatementStatuteDocumentationIdentifier.objects.get(
         rightsstatementstatute=st)
     assert di.statutedocumentationidentifiertype == ''
     assert di.statutedocumentationidentifiervalue == ''
     assert di.statutedocumentationidentifierrole == ''
     note = models.RightsStatementStatuteInformationNote.objects.get(
         rightsstatementstatute=st)
     assert note.statutenote == 'SIN & health numbers'
     rg = models.RightsStatementRightsGranted.objects.get(
         rightsstatement=rights)
     assert rg.act == 'Disseminate'
     assert rg.startdate == '1994'
     assert rg.enddate == '2094'
     assert rg.enddateopen is False
     rgnote = models.RightsStatementRightsGrantedNote.objects.get(
         rightsgranted=rg)
     assert rgnote.rightsgrantednote == ''
     rgrestriction = models.RightsStatementRightsGrantedRestriction.objects.get(
         rightsgranted=rg)
     assert rgrestriction.restriction == 'Disallow'