Beispiel #1
0
def createDMDIDSFromCSVParsedMetadataPart2(keys, values):
    global globalDmdSecCounter
    global dmdSecs
    dc = None
    other = None
    ret = []
    for i in range(1, len(keys)):
        key = keys[i]
        value = values[i]
        if key.startswith("dc.") or key.startswith("dcterms."):
            #print "dc item: ", key, value
            if dc == None:
                globalDmdSecCounter += 1
                dmdSec = etree.Element("dmdSec")
                dmdSecs.append(dmdSec)
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                ret.append(ID)
                dmdSec.set("ID", ID)
                mdWrap = newChild(dmdSec, "mdWrap")
                mdWrap.set("MDTYPE", "DC")
                xmlData = newChild(mdWrap, "xmlData")
                dc = etree.Element("dublincore", nsmap={None: dctermsNS})
                dc.set(
                    xsiBNS + "schemaLocation", dctermsNS +
                    " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd"
                )
                xmlData.append(dc)
            if key.startswith("dc."):
                key2 = key.replace("dc.", "", 1)
            elif key.startswith("dcterms."):
                key2 = key.replace("dcterms.", "", 1)
            value = value.decode('utf-8')
            etree.SubElement(dc, key2).text = value
        else:  #not a dublin core item
            #print "non dc: ", key, value
            if other == None:
                globalDmdSecCounter += 1
                dmdSec = etree.Element("dmdSec")
                dmdSecs.append(dmdSec)
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                ret.append(ID)
                dmdSec.set("ID", ID)
                mdWrap = newChild(dmdSec, "mdWrap")
                mdWrap.set("MDTYPE", "OTHER")
                mdWrap.set("OTHERMDTYPE", "CUSTOM")
                other = newChild(mdWrap, "xmlData")
            etree.SubElement(other,
                             normalizeNonDcElementName(key)).text = value
    return " ".join(ret)
def createDMDIDSFromCSVParsedMetadataPart2(keys, values):
    global globalDmdSecCounter
    global dmdSecs
    dc = None
    other = None
    ret = []
    for i in range(1, len(keys)):
        key = keys[i]
        value = values[i]
        if key.startswith("dc.") or key.startswith("dcterms."):
            #print "dc item: ", key, value
            if dc == None:
                globalDmdSecCounter += 1
                dmdSec = etree.Element("dmdSec")
                dmdSecs.append(dmdSec)
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                ret.append(ID)
                dmdSec.set("ID", ID)
                mdWrap = newChild(dmdSec, "mdWrap")
                mdWrap.set("MDTYPE", "DC")
                xmlData = newChild(mdWrap, "xmlData")
                dc = etree.Element( "dublincore", nsmap = {None: dctermsNS} )
                dc.set(xsiBNS+"schemaLocation", dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd")
                xmlData.append(dc)
            if key.startswith("dc."):
                key2 = key.replace("dc.", "", 1)
            elif  key.startswith("dcterms."):
                key2 = key.replace("dcterms.", "", 1)
            value = value.decode('utf-8')
            etree.SubElement(dc, key2).text = value
        else: #not a dublin core item
            #print "non dc: ", key, value
            if other == None:
                globalDmdSecCounter += 1
                dmdSec = etree.Element("dmdSec")
                dmdSecs.append(dmdSec)
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                ret.append(ID)
                dmdSec.set("ID", ID)
                mdWrap = newChild(dmdSec, "mdWrap")
                mdWrap.set("MDTYPE", "OTHER")
                mdWrap.set("OTHERMDTYPE", "CUSTOM")
                other = newChild(mdWrap, "xmlData")
            etree.SubElement(other, normalizeNonDcElementName(key)).text = value
    return  " ".join(ret)
def createDmdSecsFromCSVParsedMetadata(metadata):
    """
    Create dmdSec(s) from the provided metadata.

    :param metadata: OrderedDict with the metadata keys and a list of values
    :return: List of dmdSec Elements created
    """
    global globalDmdSecCounter
    global dmdSecs
    dc = None
    pbcore = None
    other = None
    ret = []

    # Archivematica does not support refined Dublin Core, e.g.
    # multitiered terms in the format dc.description.abstract
    # If these terms are encountered, an element with only the
    # last portion of the name will be added.
    # e.g., dc.description.abstract is mapped to <dc:abstract>
    refinement_regex = re.compile('\w+\.(.+)')

    for key, value in metadata.iteritems():
        if key.startswith("dc.") or key.startswith("dcterms."):
            if dc is None:
                globalDmdSecCounter += 1
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID)
                dmdSecs.append(dmdSec)
                ret.append(dmdSec)
                mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap")
                mdWrap.set("MDTYPE", "DC")
                xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData")
                dc = etree.Element(ns.dctermsBNS + "dublincore", nsmap={"dcterms": ns.dctermsNS, 'dc': ns.dcNS})
                dc.set(ns.xsiBNS + "schemaLocation", ns.dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd")
                xmlData.append(dc)
            elem_namespace = ""
            if key.startswith("dc."):
                key = key.replace("dc.", "", 1)
                elem_namespace = ns.dcBNS
            elif key.startswith("dcterms."):
                key = key.replace("dcterms.", "", 1)
                elem_namespace = ns.dctermsBNS
            match = re.match(refinement_regex, key)
            if match:
                key, = match.groups()
            for v in value:
                try:
                    etree.SubElement(dc, elem_namespace + key).text = v.decode('utf-8')
                except UnicodeDecodeError:
                    print >> sys.stderr, "Skipping DC value; not valid UTF-8: {}".format(v)
        elif key.startswith("pbcore."):
            if pbcore is None:
                globalDmdSecCounter += 1
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID)
                dmdSecs.append(dmdSec)
                ret.append(dmdSec)
                mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap")
                mdWrap.set("MDTYPE", "PBCore")
                xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData")
                pbcore = etree.Element(ns.pbcoreBNS + "pbcoreInstantiationDocument", nsmap={'pbcore': ns.pbcoreNS})
                pbcore.set(ns.xsiBNS + "schemaLocation", "http://www.pbcore.org/PBCore/PBCoreNamespace.html http://pbcore.org/xsd/pbcore-2.0.xsd")
		xmlData.append(pbcore)
            elem_namespace = ""
            if key.startswith("pbcore."):
                key = key.replace("pbcore.", "", 1)
                elem_namespace = ns.pbcoreBNS
            match = re.match(refinement_regex, key)
            if match:
                key, = match.groups()
            for v in value:
                try:
                    etree.SubElement(pbcore, elem_namespace + key).text = v.decode('utf-8')
                except UnicodeDecodeError:
                    print >> sys.stderr, "Skipping pbcore value; not valid UTF-8: {}".format(v)    
        else:  # not a dublin core item
            if other is None:
                globalDmdSecCounter += 1
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID)
                dmdSecs.append(dmdSec)
                ret.append(dmdSec)
                mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap")
                mdWrap.set("MDTYPE", "OTHER")
                mdWrap.set("OTHERMDTYPE", "CUSTOM")
                other = etree.SubElement(mdWrap, ns.metsBNS + "xmlData")
            for v in value:
                try:
                    etree.SubElement(other, normalizeNonDcElementName(key)).text = v.decode('utf-8')
                except UnicodeDecodeError:
                    print >> sys.stderr, "Skipping DC value; not valid UTF-8: {}".format(v)
    return ret
Beispiel #4
0
def createDmdSecsFromCSVParsedMetadata(metadata):
    """
    Create dmdSec(s) from the provided metadata.

    :param metadata: OrderedDict with the metadata keys and a list of values
    :return: List of dmdSec Elements created
    """
    global globalDmdSecCounter
    global dmdSecs
    dc = None
    other = None
    ret = []

    # Archivematica does not support refined Dublin Core, e.g.
    # multitiered terms in the format dc.description.abstract
    # If these terms are encountered, an element with only the
    # last portion of the name will be added.
    # e.g., dc.description.abstract is mapped to <dc:abstract>
    refinement_regex = re.compile('\w+\.(.+)')

    for key, value in metadata.items():
        if key.startswith("dc.") or key.startswith("dcterms."):
            if dc is None:
                globalDmdSecCounter += 1
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID)
                dmdSecs.append(dmdSec)
                ret.append(dmdSec)
                mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap")
                mdWrap.set("MDTYPE", "DC")
                xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData")
                dc = etree.Element(ns.dctermsBNS + "dublincore", nsmap={"dcterms": ns.dctermsNS, 'dc': ns.dcNS})
                dc.set(ns.xsiBNS + "schemaLocation", ns.dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd")
                xmlData.append(dc)
            elem_namespace = ""
            if key.startswith("dc."):
                key = key.replace("dc.", "", 1)
                elem_namespace = ns.dcBNS
            elif key.startswith("dcterms."):
                key = key.replace("dcterms.", "", 1)
                elem_namespace = ns.dctermsBNS
            match = re.match(refinement_regex, key)
            if match:
                key, = match.groups()
            for v in value:
                try:
                    etree.SubElement(dc, elem_namespace + key).text = v.decode('utf-8')
                except UnicodeDecodeError:
                    print("Skipping DC value; not valid UTF-8: {}".format(v), file=sys.stderr)
        else:  # not a dublin core item
            if other is None:
                globalDmdSecCounter += 1
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID)
                dmdSecs.append(dmdSec)
                ret.append(dmdSec)
                mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap")
                mdWrap.set("MDTYPE", "OTHER")
                mdWrap.set("OTHERMDTYPE", "CUSTOM")
                other = etree.SubElement(mdWrap, ns.metsBNS + "xmlData")
            for v in value:
                try:
                    etree.SubElement(other, normalizeNonDcElementName(key)).text = v.decode('utf-8')
                except UnicodeDecodeError:
                    print("Skipping DC value; not valid UTF-8: {}".format(v), file=sys.stderr)
    return ret