Esempio n. 1
0
def main(job, sip_uuid, dc_path):
    # If there's no metadata, that's not an error, and just keep going
    if not os.path.exists(dc_path):
        job.pyprint("DC metadata not found; exiting", "(at", dc_path + ")")
        return 0

    job.pyprint("Loading DC metadata from", dc_path)
    with open(dc_path) as json_data:
        data = json.load(json_data)
    dc = models.DublinCore(metadataappliestoidentifier=sip_uuid,
                           metadataappliestotype_id=INGEST_METADATA_TYPE)
    for key, value in data.items():
        try:
            setattr(dc, key, value)
        except AttributeError:
            job.pyprint("Invalid DC attribute:", key, file=sys.stderr)

    dc.save()

    # ``dc.json`` was copied to ingest so the code above could read it, but we
    # don't need it anymore so we're removing it.
    try:
        job.pyprint('Removing "dc.json":', dc_path)
        os.remove(dc_path)
    except Exception as err:
        job.pyprint('Unable to remove "dc.json":', err)

    return 0
def parse_dc(sip_uuid, root):
    """
    Parse SIP-level DublinCore metadata into the DublinCore table.

    Deletes existing entries associated with this SIP.

    :param str sip_uuid: UUID of the SIP to parse the metadata for.
    :param root: root Element of the METS file.
    :return: DublinCore DB object, or None
    """
    # Delete existing DC
    models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=MD_TYPE_SIP_ID).delete()
    # Parse DC
    dmds = root.xpath('mets:dmdSec/mets:mdWrap[@MDTYPE="DC"]/parent::*', namespaces=ns.NSMAP)
    dc_model = None
    # Find which DC to parse into DB
    if len(dmds) > 0:
        DC_TERMS_MATCHING = {
            'title': 'title',
            'creator': 'creator',
            'subject': 'subject',
            'description': 'description',
            'publisher': 'publisher',
            'contributor': 'contributor',
            'date': 'date',
            'type': 'type',
            'format': 'format',
            'identifier': 'identifier',
            'source': 'source',
            'relation': 'relation',
            'language': 'language',
            'coverage': 'coverage',
            'rights': 'rights',
            'isPartOf': 'is_part_of',
        }
        # Want most recently updated
        dmds = sorted(dmds, key=lambda e: e.get('CREATED'))
        # Only want SIP DC, not file DC
        div = root.find('mets:structMap/mets:div/mets:div[@TYPE="Directory"][@LABEL="objects"]', namespaces=ns.NSMAP)
        dmdids = div.get('DMDID')
        # No SIP DC
        if dmdids is None:
            return
        dmdids = dmdids.split()
        for dmd in dmds[::-1]:  # Reversed
            if dmd.get('ID') in dmdids:
                dc_xml = dmd.find('mets:mdWrap/mets:xmlData/dcterms:dublincore', namespaces=ns.NSMAP)
                break
        dc_model = models.DublinCore(
            metadataappliestoidentifier=sip_uuid,
            metadataappliestotype_id=MD_TYPE_SIP_ID,
            status=models.METADATA_STATUS_REINGEST,
        )
        print('Dublin Core:')
        for elem in dc_xml:
            tag = elem.tag.replace(ns.dctermsBNS, '', 1).replace(ns.dcBNS, '', 1)
            print(tag, elem.text)
            setattr(dc_model, DC_TERMS_MATCHING[tag], elem.text)
        dc_model.save()
    return dc_model
Esempio n. 3
0
def transfer_metadata_edit(request, uuid, id=None):
    if id:
        dc = models.DublinCore.objects.get(pk=id)
    else:
        try:
            dc = models.DublinCore.objects.get(
                metadataappliestotype=transfer_metadata_type_id(),
                metadataappliestoidentifier__exact=uuid,
            )
            return redirect("transfer:transfer_metadata_edit", uuid, dc.id)
        except models.DublinCore.DoesNotExist:
            dc = models.DublinCore(
                metadataappliestotype=transfer_metadata_type_id(),
                metadataappliestoidentifier=uuid,
            )

    if request.method == "POST":
        form = DublinCoreMetadataForm(request.POST, instance=dc)
        if form.is_valid():
            dc = form.save()
            return redirect("transfer:transfer_metadata_list", uuid)
    else:
        form = DublinCoreMetadataForm(instance=dc)
        jobs = models.Job.objects.filter(sipuuid=uuid)
        name = jobs.get_directory_name()

    return render(request, "transfer/metadata_edit.html", locals())
def main(sip_uuid, dc_path):
    # If there's no metadata, that's not an error, and just keep going
    if not os.path.exists(dc_path):
        print("DC metadata not found; exiting", "(at", dc_path + ")")
        return 0

    print("Loading DC metadata from", dc_path)
    with open(dc_path) as json_data:
        data = json.load(json_data)
    dc = models.DublinCore(metadataappliestoidentifier=sip_uuid,
                           metadataappliestotype_id=INGEST_METADATA_TYPE)
    for key, value in data.items():
        try:
            setattr(dc, key, value)
        except AttributeError:
            print("Invalid DC attribute:", key, file=sys.stderr)

    dc.save()
    return 0
Esempio n. 5
0
def transfer_metadata_edit(request, uuid, id=None):
    if id:
        dc = models.DublinCore.objects.get(pk=id)
    else:
        try:
            dc = models.DublinCore.objects.get(
                metadataappliestotype=transfer_metadata_type_id(),
                metadataappliestoidentifier__exact=uuid)
            return redirect('components.transfer.views.transfer_metadata_edit',
                            uuid, dc.id)
        except models.DublinCore.DoesNotExist:
            dc = models.DublinCore(
                metadataappliestotype=transfer_metadata_type_id(),
                metadataappliestoidentifier=uuid)

    fields = [
        'title', 'creator', 'subject', 'description', 'publisher',
        'contributor', 'date', 'type', 'format', 'identifier', 'source',
        'relation', 'language', 'coverage', 'rights'
    ]

    if request.method == 'POST':
        form = DublinCoreMetadataForm(request.POST)
        if form.is_valid():
            for item in fields:
                if item not in form.cleaned_data:
                    continue
                setattr(dc, item, form.cleaned_data[item])
            dc.save()
            return redirect('components.transfer.views.transfer_metadata_list',
                            uuid)
    else:
        initial = {}
        for item in fields:
            initial[item] = getattr(dc, item)
        form = DublinCoreMetadataForm(initial=initial)
        jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='')
        name = utils.get_directory_name_from_job(jobs)

    return render(request, 'transfer/metadata_edit.html', locals())
Esempio n. 6
0
def ingest_metadata_edit(request, uuid, id=None):
    if id:
        dc = models.DublinCore.objects.get(pk=id)
    else:
        # Right now we only support linking metadata to the Ingest
        try:
            dc = models.DublinCore.objects.get_sip_metadata(uuid)
            return HttpResponseRedirect(
                reverse('components.ingest.views.ingest_metadata_edit',
                        args=[uuid, dc.id]))
        except ObjectDoesNotExist:
            dc = models.DublinCore(
                metadataappliestotype=ingest_sip_metadata_type_id(),
                metadataappliestoidentifier=uuid)

    fields = [
        'title', 'creator', 'subject', 'description', 'publisher',
        'contributor', 'date', 'type', 'format', 'identifier', 'source',
        'relation', 'language', 'coverage', 'rights'
    ]

    if request.method == 'POST':
        form = DublinCoreMetadataForm(request.POST)
        if form.is_valid():
            for item in fields:
                setattr(dc, item, form.cleaned_data[item])
            dc.save()
            return HttpResponseRedirect(
                reverse('components.ingest.views.ingest_metadata_list',
                        args=[uuid]))
    else:
        initial = {}
        for item in fields:
            initial[item] = getattr(dc, item)
        form = DublinCoreMetadataForm(initial=initial)
        jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='')
        name = utils.get_directory_name(jobs[0])

    return render(request, 'ingest/metadata_edit.html', locals())
Esempio n. 7
0
def ingest_metadata_edit(request, uuid, id=None):
    if id:
        # If we have the ID of the DC object, use that - Edit
        dc = models.DublinCore.objects.get(pk=id)
    else:
        # Otherwise look for a SIP with the provided UUID, creating a new one
        # if needed.  Not using get_or_create because that save the empty
        # object, even if the form is not submitted.
        sip_type_id = ingest_sip_metadata_type_id()
        try:
            dc = models.DublinCore.objects.get(
                metadataappliestotype=sip_type_id,
                metadataappliestoidentifier=uuid)
            id = dc.id
        except models.DublinCore.DoesNotExist:
            dc = models.DublinCore(
                metadataappliestotype=sip_type_id,
                metadataappliestoidentifier=uuid)

    # If the SIP is an AIC, use the AIC metadata form
    if 'AIC' in models.SIP.objects.get(uuid=uuid).sip_type:
        form = ingest_forms.AICDublinCoreMetadataForm(request.POST or None,
            instance=dc)
        dc_type = "Archival Information Collection"
    else:
        form = ingest_forms.DublinCoreMetadataForm(request.POST or None,
            instance=dc)
        dc_type = "Archival Information Package"

    if form.is_valid():
        dc = form.save()
        dc.type = dc_type
        dc.save()
        return redirect('components.ingest.views.ingest_metadata_list', uuid)
    jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='')
    name = utils.get_directory_name_from_job(jobs[0])

    return render(request, 'ingest/metadata_edit.html', locals())
Esempio n. 8
0
def aic_metadata_add(request, uuid):
    sip_type_id = ingest_sip_metadata_type_id()
    try:
        dc = models.DublinCore.objects.get(metadataappliestotype=sip_type_id,
                                           metadataappliestoidentifier=uuid)
        id = dc.id
    except models.DublinCore.DoesNotExist:
        dc = models.DublinCore(metadataappliestotype=sip_type_id,
                               metadataappliestoidentifier=uuid)

    form = ingest_forms.AICDublinCoreMetadataForm(request.POST or None,
                                                  instance=dc)
    if form.is_valid():
        # Save the metadata
        dc = form.save()
        dc.type = "Archival Information Collection"
        dc.save()

        # Start the MicroServiceChainLink for the AIC
        shared_dir = django_settings.SHARED_DIRECTORY
        source = os.path.join(shared_dir, 'tmp', uuid)

        watched_dir = django_settings.WATCH_DIRECTORY
        name = dc.title if dc.title else dc.identifier
        name = slugify(name).replace('-', '_')
        dir_name = '{name}-{uuid}'.format(name=name, uuid=uuid)
        destination = os.path.join(watched_dir, 'system', 'createAIC',
                                   dir_name)

        destination_db = destination.replace(shared_dir, '%sharedPath%') + '/'
        models.SIP.objects.filter(uuid=uuid).update(currentpath=destination_db)
        shutil.move(source, destination)
        return redirect('ingest_index')

    name = dc.title or "New AIC"
    aic = True
    return render(request, 'ingest/metadata_edit.html', locals())
def parse_dc(job, sip_uuid, root):
    """
    Parse SIP-level DublinCore metadata into the DublinCore table.

    Deletes existing entries associated with this SIP.

    :param str sip_uuid: UUID of the SIP to parse the metadata for.
    :param root: root Element of the METS file.
    :return: DublinCore DB object, or None
    """
    # Delete existing DC
    models.DublinCore.objects.filter(
        metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=MD_TYPE_SIP_ID
    ).delete()
    # Parse DC
    dmds = root.xpath(
        'mets:dmdSec/mets:mdWrap[@MDTYPE="DC"]/parent::*', namespaces=ns.NSMAP
    )
    dc_model = None
    # Find which DC to parse into DB
    if len(dmds) > 0:
        DC_TERMS_MATCHING = {
            "title": "title",
            "creator": "creator",
            "subject": "subject",
            "description": "description",
            "publisher": "publisher",
            "contributor": "contributor",
            "date": "date",
            "type": "type",
            "format": "format",
            "identifier": "identifier",
            "source": "source",
            "relation": "relation",
            "language": "language",
            "coverage": "coverage",
            "rights": "rights",
            "isPartOf": "is_part_of",
        }
        # Want most recently updated
        dmds = sorted(dmds, key=lambda e: e.get("CREATED"))
        # Only want SIP DC, not file DC
        div = root.find(
            'mets:structMap/mets:div/mets:div[@TYPE="Directory"][@LABEL="objects"]',
            namespaces=ns.NSMAP,
        )
        dmdids = div.get("DMDID")
        # No SIP DC
        if dmdids is None:
            return
        dmdids = dmdids.split()
        for dmd in dmds[::-1]:  # Reversed
            if dmd.get("ID") in dmdids:
                dc_xml = dmd.find(
                    "mets:mdWrap/mets:xmlData/dcterms:dublincore", namespaces=ns.NSMAP
                )
                break
        dc_model = models.DublinCore(
            metadataappliestoidentifier=sip_uuid,
            metadataappliestotype_id=MD_TYPE_SIP_ID,
            status=models.METADATA_STATUS_REINGEST,
        )
        job.pyprint("Dublin Core:")
        for elem in dc_xml:
            tag = elem.tag.replace(ns.dctermsBNS, "", 1).replace(ns.dcBNS, "", 1)
            job.pyprint(tag, elem.text)
            if elem.text is not None:
                setattr(dc_model, DC_TERMS_MATCHING[tag], elem.text)
        dc_model.save()
    return dc_model