def main(job, sip_uuid, dc_path): # If there's no metadata, that's not an error, and just keep going if not os.path.exists(dc_path): job.pyprint("DC metadata not found; exiting", "(at", dc_path + ")") return 0 job.pyprint("Loading DC metadata from", dc_path) with open(dc_path) as json_data: data = json.load(json_data) dc = models.DublinCore(metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=INGEST_METADATA_TYPE) for key, value in data.items(): try: setattr(dc, key, value) except AttributeError: job.pyprint("Invalid DC attribute:", key, file=sys.stderr) dc.save() # ``dc.json`` was copied to ingest so the code above could read it, but we # don't need it anymore so we're removing it. try: job.pyprint('Removing "dc.json":', dc_path) os.remove(dc_path) except Exception as err: job.pyprint('Unable to remove "dc.json":', err) return 0
def parse_dc(sip_uuid, root): """ Parse SIP-level DublinCore metadata into the DublinCore table. Deletes existing entries associated with this SIP. :param str sip_uuid: UUID of the SIP to parse the metadata for. :param root: root Element of the METS file. :return: DublinCore DB object, or None """ # Delete existing DC models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=MD_TYPE_SIP_ID).delete() # Parse DC dmds = root.xpath('mets:dmdSec/mets:mdWrap[@MDTYPE="DC"]/parent::*', namespaces=ns.NSMAP) dc_model = None # Find which DC to parse into DB if len(dmds) > 0: DC_TERMS_MATCHING = { 'title': 'title', 'creator': 'creator', 'subject': 'subject', 'description': 'description', 'publisher': 'publisher', 'contributor': 'contributor', 'date': 'date', 'type': 'type', 'format': 'format', 'identifier': 'identifier', 'source': 'source', 'relation': 'relation', 'language': 'language', 'coverage': 'coverage', 'rights': 'rights', 'isPartOf': 'is_part_of', } # Want most recently updated dmds = sorted(dmds, key=lambda e: e.get('CREATED')) # Only want SIP DC, not file DC div = root.find('mets:structMap/mets:div/mets:div[@TYPE="Directory"][@LABEL="objects"]', namespaces=ns.NSMAP) dmdids = div.get('DMDID') # No SIP DC if dmdids is None: return dmdids = dmdids.split() for dmd in dmds[::-1]: # Reversed if dmd.get('ID') in dmdids: dc_xml = dmd.find('mets:mdWrap/mets:xmlData/dcterms:dublincore', namespaces=ns.NSMAP) break dc_model = models.DublinCore( metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=MD_TYPE_SIP_ID, status=models.METADATA_STATUS_REINGEST, ) print('Dublin Core:') for elem in dc_xml: tag = elem.tag.replace(ns.dctermsBNS, '', 1).replace(ns.dcBNS, '', 1) print(tag, elem.text) setattr(dc_model, DC_TERMS_MATCHING[tag], elem.text) dc_model.save() return dc_model
def transfer_metadata_edit(request, uuid, id=None): if id: dc = models.DublinCore.objects.get(pk=id) else: try: dc = models.DublinCore.objects.get( metadataappliestotype=transfer_metadata_type_id(), metadataappliestoidentifier__exact=uuid, ) return redirect("transfer:transfer_metadata_edit", uuid, dc.id) except models.DublinCore.DoesNotExist: dc = models.DublinCore( metadataappliestotype=transfer_metadata_type_id(), metadataappliestoidentifier=uuid, ) if request.method == "POST": form = DublinCoreMetadataForm(request.POST, instance=dc) if form.is_valid(): dc = form.save() return redirect("transfer:transfer_metadata_list", uuid) else: form = DublinCoreMetadataForm(instance=dc) jobs = models.Job.objects.filter(sipuuid=uuid) name = jobs.get_directory_name() return render(request, "transfer/metadata_edit.html", locals())
def main(sip_uuid, dc_path): # If there's no metadata, that's not an error, and just keep going if not os.path.exists(dc_path): print("DC metadata not found; exiting", "(at", dc_path + ")") return 0 print("Loading DC metadata from", dc_path) with open(dc_path) as json_data: data = json.load(json_data) dc = models.DublinCore(metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=INGEST_METADATA_TYPE) for key, value in data.items(): try: setattr(dc, key, value) except AttributeError: print("Invalid DC attribute:", key, file=sys.stderr) dc.save() return 0
def transfer_metadata_edit(request, uuid, id=None): if id: dc = models.DublinCore.objects.get(pk=id) else: try: dc = models.DublinCore.objects.get( metadataappliestotype=transfer_metadata_type_id(), metadataappliestoidentifier__exact=uuid) return redirect('components.transfer.views.transfer_metadata_edit', uuid, dc.id) except models.DublinCore.DoesNotExist: dc = models.DublinCore( metadataappliestotype=transfer_metadata_type_id(), metadataappliestoidentifier=uuid) fields = [ 'title', 'creator', 'subject', 'description', 'publisher', 'contributor', 'date', 'type', 'format', 'identifier', 'source', 'relation', 'language', 'coverage', 'rights' ] if request.method == 'POST': form = DublinCoreMetadataForm(request.POST) if form.is_valid(): for item in fields: if item not in form.cleaned_data: continue setattr(dc, item, form.cleaned_data[item]) dc.save() return redirect('components.transfer.views.transfer_metadata_list', uuid) else: initial = {} for item in fields: initial[item] = getattr(dc, item) form = DublinCoreMetadataForm(initial=initial) jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='') name = utils.get_directory_name_from_job(jobs) return render(request, 'transfer/metadata_edit.html', locals())
def ingest_metadata_edit(request, uuid, id=None): if id: dc = models.DublinCore.objects.get(pk=id) else: # Right now we only support linking metadata to the Ingest try: dc = models.DublinCore.objects.get_sip_metadata(uuid) return HttpResponseRedirect( reverse('components.ingest.views.ingest_metadata_edit', args=[uuid, dc.id])) except ObjectDoesNotExist: dc = models.DublinCore( metadataappliestotype=ingest_sip_metadata_type_id(), metadataappliestoidentifier=uuid) fields = [ 'title', 'creator', 'subject', 'description', 'publisher', 'contributor', 'date', 'type', 'format', 'identifier', 'source', 'relation', 'language', 'coverage', 'rights' ] if request.method == 'POST': form = DublinCoreMetadataForm(request.POST) if form.is_valid(): for item in fields: setattr(dc, item, form.cleaned_data[item]) dc.save() return HttpResponseRedirect( reverse('components.ingest.views.ingest_metadata_list', args=[uuid])) else: initial = {} for item in fields: initial[item] = getattr(dc, item) form = DublinCoreMetadataForm(initial=initial) jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='') name = utils.get_directory_name(jobs[0]) return render(request, 'ingest/metadata_edit.html', locals())
def ingest_metadata_edit(request, uuid, id=None): if id: # If we have the ID of the DC object, use that - Edit dc = models.DublinCore.objects.get(pk=id) else: # Otherwise look for a SIP with the provided UUID, creating a new one # if needed. Not using get_or_create because that save the empty # object, even if the form is not submitted. sip_type_id = ingest_sip_metadata_type_id() try: dc = models.DublinCore.objects.get( metadataappliestotype=sip_type_id, metadataappliestoidentifier=uuid) id = dc.id except models.DublinCore.DoesNotExist: dc = models.DublinCore( metadataappliestotype=sip_type_id, metadataappliestoidentifier=uuid) # If the SIP is an AIC, use the AIC metadata form if 'AIC' in models.SIP.objects.get(uuid=uuid).sip_type: form = ingest_forms.AICDublinCoreMetadataForm(request.POST or None, instance=dc) dc_type = "Archival Information Collection" else: form = ingest_forms.DublinCoreMetadataForm(request.POST or None, instance=dc) dc_type = "Archival Information Package" if form.is_valid(): dc = form.save() dc.type = dc_type dc.save() return redirect('components.ingest.views.ingest_metadata_list', uuid) jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='') name = utils.get_directory_name_from_job(jobs[0]) return render(request, 'ingest/metadata_edit.html', locals())
def aic_metadata_add(request, uuid): sip_type_id = ingest_sip_metadata_type_id() try: dc = models.DublinCore.objects.get(metadataappliestotype=sip_type_id, metadataappliestoidentifier=uuid) id = dc.id except models.DublinCore.DoesNotExist: dc = models.DublinCore(metadataappliestotype=sip_type_id, metadataappliestoidentifier=uuid) form = ingest_forms.AICDublinCoreMetadataForm(request.POST or None, instance=dc) if form.is_valid(): # Save the metadata dc = form.save() dc.type = "Archival Information Collection" dc.save() # Start the MicroServiceChainLink for the AIC shared_dir = django_settings.SHARED_DIRECTORY source = os.path.join(shared_dir, 'tmp', uuid) watched_dir = django_settings.WATCH_DIRECTORY name = dc.title if dc.title else dc.identifier name = slugify(name).replace('-', '_') dir_name = '{name}-{uuid}'.format(name=name, uuid=uuid) destination = os.path.join(watched_dir, 'system', 'createAIC', dir_name) destination_db = destination.replace(shared_dir, '%sharedPath%') + '/' models.SIP.objects.filter(uuid=uuid).update(currentpath=destination_db) shutil.move(source, destination) return redirect('ingest_index') name = dc.title or "New AIC" aic = True return render(request, 'ingest/metadata_edit.html', locals())
def parse_dc(job, sip_uuid, root): """ Parse SIP-level DublinCore metadata into the DublinCore table. Deletes existing entries associated with this SIP. :param str sip_uuid: UUID of the SIP to parse the metadata for. :param root: root Element of the METS file. :return: DublinCore DB object, or None """ # Delete existing DC models.DublinCore.objects.filter( metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=MD_TYPE_SIP_ID ).delete() # Parse DC dmds = root.xpath( 'mets:dmdSec/mets:mdWrap[@MDTYPE="DC"]/parent::*', namespaces=ns.NSMAP ) dc_model = None # Find which DC to parse into DB if len(dmds) > 0: DC_TERMS_MATCHING = { "title": "title", "creator": "creator", "subject": "subject", "description": "description", "publisher": "publisher", "contributor": "contributor", "date": "date", "type": "type", "format": "format", "identifier": "identifier", "source": "source", "relation": "relation", "language": "language", "coverage": "coverage", "rights": "rights", "isPartOf": "is_part_of", } # Want most recently updated dmds = sorted(dmds, key=lambda e: e.get("CREATED")) # Only want SIP DC, not file DC div = root.find( 'mets:structMap/mets:div/mets:div[@TYPE="Directory"][@LABEL="objects"]', namespaces=ns.NSMAP, ) dmdids = div.get("DMDID") # No SIP DC if dmdids is None: return dmdids = dmdids.split() for dmd in dmds[::-1]: # Reversed if dmd.get("ID") in dmdids: dc_xml = dmd.find( "mets:mdWrap/mets:xmlData/dcterms:dublincore", namespaces=ns.NSMAP ) break dc_model = models.DublinCore( metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=MD_TYPE_SIP_ID, status=models.METADATA_STATUS_REINGEST, ) job.pyprint("Dublin Core:") for elem in dc_xml: tag = elem.tag.replace(ns.dctermsBNS, "", 1).replace(ns.dcBNS, "", 1) job.pyprint(tag, elem.text) if elem.text is not None: setattr(dc_model, DC_TERMS_MATCHING[tag], elem.text) dc_model.save() return dc_model