Exemple #1
0
def ddi_xmletree_to_metajson(ddi_root, source, rec_id_prefix):
    """ DDI xmletree -> MetaJSON Document """
    if ddi_root is None:
        return None

    document = Document()

    document["rec_type"] = constants.DOC_TYPE_DATASETQUALI

    if source:
        document["rec_source"] = source

    # stdyDscr/citation/titlStmt/titl
    ddi_stdydscr = ddi_root.find(xmletree.prefixtag("ddi", "stdyDscr"))
    if ddi_stdydscr is not None:
        ddi_stdydscr_citation = ddi_stdydscr.find(
            xmletree.prefixtag("ddi", "citation"))
        ddi_stdydscr_citation_titlstmt = ddi_stdydscr_citation.find(
            xmletree.prefixtag("ddi", "titlStmt"))
        ddi_stdydscr_citation_titlstmt_titl = ddi_stdydscr_citation_titlstmt.find(
            xmletree.prefixtag("ddi", "titl"))
        if ddi_stdydscr_citation_titlstmt_titl is not None:
            document["title"] = ddi_stdydscr_citation_titlstmt_titl.text

    return document
Exemple #2
0
def get_rml_phones(rml):
    """ phone -> phones """
    result = {}
    rml_phones = rml.findall(xmletree.prefixtag("rml", "phone"))
    if rml_phones is not None:
        phones = []
        for rml_phone in rml_phones:
            if rml_phone is not None:
                # @preferred -> preferred
                preferred = xmletree.get_element_attribute_as_boolean(rml_phone, "preferred")
                # @relationType -> relation_type
                relation_type = rml_phone.get("relationType")
                # @type -> phone_type
                phone_type = rml_phone.get("type")
                # @visible -> visible
                visible = xmletree.get_element_attribute_as_boolean(rml_phone, "visible")
                # formatted -> formatted
                rml_formatted = rml_phone.find(xmletree.prefixtag("rml", "formatted"))
                formatted = xmletree.get_element_text(rml_formatted)

                phone = metajson_service.create_phone(formatted, phone_type, preferred, relation_type, visible)
                if phone:
                    phones.append(phone)
        if phones:
            result["phones"] = phones
    return result
Exemple #3
0
def get_rml_research_coverages(rml):
    """ researchCoverage -> research_coverage_classifications & esearch_coverage_keywords """
    result = {}
    rml_rcs = rml.findall(xmletree.prefixtag("rml", "researchCoverage"))
    if rml_rcs is not None:
        rc_classifications_dict = {}
        rc_keywords = {}
        for rml_rc in rml_rcs:
            if rml_rc is not None:
                value = rml_rc.text.strip()
                if value is not None:
                    rc_type = rml_rc.get("type")
                    if rc_type == "keyword":
                        language = rml_rc.get(xmletree.prefixtag("xml", "lang"))
                        if language is not None:
                            if language in rc_keywords:
                                rc_keywords[language].append(value)
                            else:
                                rc_keywords[language] = [value]
                    else:
                        rc_classification = {"term_id": value}
                        authority = rml_rc.get("authority")
                        if authority is None:
                            authority = "undetermined"
                        if authority not in rc_classifications_dict:
                            rc_classifications_dict[authority] = []
                        rc_classifications_dict[authority].append(rc_classification)
        if rc_classifications_dict:
            result["research_coverage_classifications"] = rc_classifications_dict
        if rc_keywords:
            result["research_coverage_keywords"] = rc_keywords
    return result
def metajson_to_oai_dc_xmletree(document, with_schema_location=True):
    """ MetaJSON Document -> oai_dc xmletree """
    rec_id = document["rec_id"]
    xmletree.register_namespaces()
    # oai_dc root
    oai_dc = ET.Element(xmletree.prefixtag("oai_dc", "oai_dc"))
    if with_schema_location:
        oai_dc.set(xmletree.prefixtag("xsi", "schemaLocation"), constants.xmlns_map["oai_dc"] + " " + constants.xmlns_schema_map["oai_dc"])

    # title
    if "title" in document:
        dc_title = ET.SubElement(oai_dc, xmletree.prefixtag("dc", "title"))
        dc_title.text = document["title"]

    # creators
    if "creators" in document and document["creators"]:
        for creator in document["creators"]:
            if "roles" in creator and creator["roles"] and creator["roles"][0]:
                creator_role = creator["roles"][0]
                if creator_role in creator_service.role_type and creator_service.role_type[creator_role] == creator_service.role_type_creator:
                    dc_creator = ET.SubElement(oai_dc, xmletree.prefixtag("dc", "creator"))
                    dc_creator.text = creator.formatted_name(metajson.STYLE_FAMILY_COMMA_GIVEN)
                    continue
            dc_contributor = ET.SubElement(oai_dc, xmletree.prefixtag("dc", "contributor"))
            dc_contributor.text = creator.formatted_name(metajson.STYLE_FAMILY_COMMA_GIVEN)

    return (rec_id, oai_dc)
Exemple #5
0
def get_tei_titles_to_metason(tei_element, doc_language):
    result = {}
    title_translated_dict = {}
    title_abbreviated_list = []

    tei_titles = tei_element.findall(xmletree.prefixtag("tei", "title"))

    for tei_title in tei_titles:
        title_type = tei_title.get("type")
        title_text = tei_title.text.strip()
        title_lang = tei_title.get(xmletree.prefixtag("xml", "lang"))

        if title_type == None:
            if title_lang == doc_language:
                result["title"] = title_text
            else:
                if title_lang in title_translated_dict:
                    title_translated = title_translated_dict[title_lang]
                else:
                    title_translated = {}
                title_translated["language"] = title_lang
                title_translated["title"] = title_text
                title_translated_dict[title_lang] = title_translated

        elif title_type == "main":
            result["title"] = title_text

        elif title_type == "alt":
            if title_lang in title_translated_dict:
                title_translated = title_translated_dict[title_lang]
            else:
                title_translated = {}
            title_translated["language"] = title_lang
            title_translated["title"] = title_text
            title_translated_dict[title_lang] = title_translated

        elif title_type == "sub":
            if title_lang == doc_language:
                result["title_sub"] = title_text
            else:
                if title_lang in title_translated_dict:
                    title_translated = title_translated_dict[title_lang]
                else:
                    title_translated = {}
                title_translated["language"] = title_lang
                title_translated["title_sub"] = title_text
                title_translated_dict[title_lang] = title_translated

        elif title_type == "short":
            title_abbreviated = {}
            title_abbreviated["language"] = title_lang
            title_abbreviated["title"] = title_text
            title_abbreviated_list.append(title_abbreviated)

    if title_translated_dict.values():
        result["title_translateds"] = title_translated_dict.values()
    if title_abbreviated_list:
        result["title_abbreviateds"] = title_abbreviated_list
    return result
def get_tei_titles_to_metason(tei_element, doc_language):
    result = {}
    title_translated_dict = {}
    title_abbreviated_list = []

    tei_titles = tei_element.findall(xmletree.prefixtag("tei", "title"))

    for tei_title in tei_titles:
        title_type = tei_title.get("type")
        title_text = tei_title.text.strip()
        title_lang = tei_title.get(xmletree.prefixtag("xml", "lang"))

        if title_type == None:
            if title_lang == doc_language:
                result["title"] = title_text
            else:
                if title_lang in title_translated_dict:
                    title_translated = title_translated_dict[title_lang]
                else:
                    title_translated = {}
                title_translated["language"] = title_lang
                title_translated["title"] = title_text
                title_translated_dict[title_lang] = title_translated

        elif title_type == "main":
            result["title"] = title_text

        elif title_type == "alt":
            if title_lang in title_translated_dict:
                title_translated = title_translated_dict[title_lang]
            else:
                title_translated = {}
            title_translated["language"] = title_lang
            title_translated["title"] = title_text
            title_translated_dict[title_lang] = title_translated

        elif title_type == "sub":
            if title_lang == doc_language:
                result["title_sub"] = title_text
            else:
                if title_lang in title_translated_dict:
                    title_translated = title_translated_dict[title_lang]
                else:
                    title_translated = {}
                title_translated["language"] = title_lang
                title_translated["title_sub"] = title_text
                title_translated_dict[title_lang] = title_translated

        elif title_type == "short":
            title_abbreviated = {}
            title_abbreviated["language"] = title_lang
            title_abbreviated["title"] = title_text
            title_abbreviated_list.append(title_abbreviated)

    if title_translated_dict.values():
        result["title_translateds"] = title_translated_dict.values()
    if title_abbreviated_list:
        result["title_abbreviateds"] = title_abbreviated_list
    return result
def tei_xmletree_to_metajson_list(tei_root, source, rec_id_prefix, only_first_record):
    """  TEI xmletree -> MetaJSON Document list"""
    if tei_root is not None:
        # TEI/text
        tei_text = tei_root.find(xmletree.prefixtag("tei", "text"))

        # TEI/text/body
        tei_body = tei_text.find(xmletree.prefixtag("tei", "body"))
        # TEI/text/body/listBibl
        tei_body_listbibl = tei_body.find(xmletree.prefixtag("tei", "listBibl"))
        # TEI/text/body/listBibl/biblFull
        tei_body_listbibl_biblfulls = tei_body_listbibl.findall(xmletree.prefixtag("tei", "biblFull"))

        # TEI/text/back
        tei_back = tei_text.find(xmletree.prefixtag("tei", "back"))
        # TEI/text/back/div
        tei_back_divs = tei_back.findall(xmletree.prefixtag("tei", "div"))
        laboratories = []
        projects = []
        if tei_back_divs:
            for tei_back_div in tei_back_divs:
                if tei_back_div.get("type") == "laboratories":
                    orgs = tei_back_div.findall(xmletree.prefixtag("tei", "org"))
                    for org in orgs:
                        laboratories.append(org_laboratory_to_metajson(org))
                elif tei_back_div.get("type") == "projects":
                    orgs = tei_back_div.findall(xmletree.prefixtag("tei", "org"))
                    for org in orgs:
                        laboratories.append(org_project_to_metajson(org))

        for biblfull in tei_body_listbibl_biblfulls:
            yield biblfull_xmletree_to_metajson(biblfull, laboratories, projects, source)
Exemple #8
0
def get_rml_identifiers(rml):
    """ identifier -> identifiers """
    result = {}
    rml_identifiers = rml.findall(xmletree.prefixtag("rml", "identifier"))
    if rml_identifiers is not None:
        identifiers = []
        rec_id = None
        for rml_identifier in rml_identifiers:
            if rml_identifier is not None:
                # @type -> id_type
                id_type = rml_identifier.get("type")
                # value
                id_value = xmletree.get_element_text(rml_identifier)
                if id_type is None or id_type == "hdl":
                    # rec_id
                    rec_id = id_value
                else:
                    # identifier
                    identifier = metajson_service.create_identifier(id_type, id_value)
                    if identifier is not None:
                        identifiers.append(identifier)
        if identifiers:
            result["identifiers"] = identifiers
        if rec_id:
            result["rec_id"] = rec_id
    return result
Exemple #9
0
def get_rml_emails(rml):
    """ email -> emails """
    result = {}
    rml_emails = rml.findall(xmletree.prefixtag("rml", "email"))
    if rml_emails is not None:
        emails = []
        for rml_email in rml_emails:
            if rml_email is not None:
                # @preferred -> preferred
                preferred = xmletree.get_element_attribute_as_boolean(rml_email, "preferred")

                # @relationType -> relation_type
                relation_type = rml_email.get("relationType")

                # @visible -> visible
                visible = xmletree.get_element_attribute_as_boolean(rml_email, "visible")

                # value
                value = xmletree.get_element_text(rml_email)

                email = metajson_service.create_email(value, preferred, relation_type, visible)
                if email:
                    emails.append(email)
        if emails:
            result["emails"] = emails
    return result
Exemple #10
0
def get_rml_instant_messages(rml):
    """ instantMessage -> instant_messages """
    result = {}
    rml_ims = rml.findall(xmletree.prefixtag("rml", "instantMessage"))
    if rml_ims is not None:
        ims = []
        for rml_im in rml_ims:
            if rml_im is not None:
                # @preferred -> preferred
                preferred = xmletree.get_element_attribute_as_boolean(rml_im, "preferred")
                # @relationType -> relation_type
                relation_type = rml_im.get("relationType")
                # @service -> service
                service = rml_im.get("service")
                # @visible -> visible
                visible = xmletree.get_element_attribute_as_boolean(rml_im, "visible")
                # value
                value = xmletree.get_element_text(rml_im)

                im = metajson_service.create_instant_message(value, service, preferred, relation_type, visible)
                if im:
                    ims.append(im)
        if ims:
            result["instant_messages"] = ims
    return result
Exemple #11
0
def get_rml_language_capabilities(rml):
    """ languageCapability -> language_capabilities """
    result = {}
    rml_lcs = rml.findall(xmletree.prefixtag("rml", "languageCapability"))
    if rml_lcs is not None:
        lcs = []
        for rml_lc in rml_lcs:
            if rml_lc is not None:
                # language -> language
                language = get_rml_element_text(rml_lc, "language")
                language = language_service.convert_unknown_format_to_rfc5646(language)

                # motherTong -> mother_tong
                mother_tong = get_rml_element_text_as_boolean(rml_lc, "motherTong")

                # oralInput -> oral_input
                oral_input = get_rml_element_text(rml_lc, "oralInput")

                # oralOutput -> oral_output
                oral_output = get_rml_element_text(rml_lc, "oralOutput")

                # textInput -> text_input
                text_input = get_rml_element_text(rml_lc, "textInput")

                # textOutput -> text_output
                text_output = get_rml_element_text(rml_lc, "textOutput")

                lc = metajson_service.create_language_capability(language, mother_tong, oral_input, oral_output, text_input, text_output)
                if lc is not None:
                    lcs.append(lc)
        if lcs:
            result["language_capabilities"] = lcs
    return result
Exemple #12
0
def get_rml_relationships(rml):
    """ relationship -> relationships """
    result = {}
    rml_relationships = rml.findall(xmletree.prefixtag("rml", "relationship"))
    if rml_relationships is not None:
        relationships = []
        for rml_relationship in rml_relationships:
            if rml_relationship is not None:
                # name -> agent.name
                name = get_rml_element_text(rml_relationship, "name")
                relationship = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_PERSON, None)
                if relationship is None:
                    relationship = {}
                    relationship["agent"] = Person()

                # identifier -> agent.rec_id & agent.identifiers
                relationship["agent"].update(get_rml_identifiers(rml_relationship))

                # relationType -> relation_type
                relationship.update(get_rml_element_text_and_set_key(rml_relationship, "relationType", "relation_type"))

                # descriptions -> descriptions
                relationship.update(get_rml_textlangs_and_set_key(rml_relationship, "description", "descriptions"))

                if relationship is not None:
                    relationships.append(relationship)
        if relationships:
            result["relationships"] = relationships
    return result
Exemple #13
0
def get_rml_textlangs_as_list(rml, element):
    """ @xml:lang -> language
        text -> value """
    rml_sls = rml.findall(xmletree.prefixtag("rml", element))
    if rml_sls is not None:
        sls = []
        for rml_sl in rml_sls:
            if rml_sl is not None and rml_sl.text is not None:
                language = rml_sl.get(xmletree.prefixtag("xml", "lang"))
                value = rml_sl.text.strip()
                if value is not None:
                    sl = {"value": value}
                    if language is not None:
                        sl["language"] = language.strip()
                    sls.append(sl)
        if sls:
            return sls
def get_mods_textlangs_as_list(rml, element):
    """ @xml:lang -> language
        text -> value """
    rml_sls = rml.findall(xmletree.prefixtag("mods", element))
    if rml_sls is not None:
        sls = []
        for rml_sl in rml_sls:
            if rml_sl is not None and rml_sl.text is not None:
                language = rml_sl.get(xmletree.prefixtag("xml", "lang"))
                value = rml_sl.text.strip()
                if value is not None:
                    sl = {"value": value}
                    if language is not None:
                        sl["language"] = language.strip()
                    sls.append(sl)
        if sls:
            return sls
Exemple #15
0
def get_rml_call(rml):
    """ call -> call """
    result = {}
    rml_call = rml.find(xmletree.prefixtag("rml", "call"))
    if rml_call is not None:
        call = Call()

        # funding -> funding
        rml_funding = rml_call.find(xmletree.prefixtag("rml", "funding"))
        if rml_funding is not None:
            # name -> agent.name
            name = get_rml_element_text(rml_funding, "name")
            funding = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_ORGUNIT, None)
            if funding is None:
                funding = Creator()
                funding["agent"] = Orgunit()

            # identifier -> agent.rec_id & agent.identifiers
            funding["agent"].update(get_rml_identifiers(rml_funding))

            # programme -> programme
            funding.update(get_rml_element_text_and_set_key(rml_funding, "programme", "programme"))

            # scheme -> scheme
            funding.update(get_rml_element_text_and_set_key(rml_funding, "scheme", "scheme"))

            # contribution -> contribution
            funding.update(get_rml_money_and_set_key(rml_funding, "contribution", "contribution"))

            if funding:     
                call["funding"] = funding

        # identifier -> rec_id
        call.update(get_rml_element_text_and_set_key(rml_call, "identifier", "rec_id"))

        # title -> title
        call.update(get_rml_element_text_and_set_key(rml_call, "title", "title"))

        # year -> date_issued
        call.update(get_rml_element_text_and_set_key(rml_call, "year", "date_issued"))

        if call:
            result["call"] = call
    return result
def get_mods_elements_text(rml, element):
    elements_xmletree = rml.findall(xmletree.prefixtag("mods", element))
    if elements_xmletree is not None:
        results = []
        for element_xmletree in elements_xmletree:
            if element_xmletree is not None:
                results.append(xmletree.get_element_text(element_xmletree))
        if results:
            return results
    return None
Exemple #17
0
def get_rml_elements_text(rml, element):
    elements_xmletree = rml.findall(xmletree.prefixtag("rml", element))
    if elements_xmletree is not None:
        results = []
        for element_xmletree in elements_xmletree:
            if element_xmletree is not None:
                results.append(xmletree.get_element_text(element_xmletree))
        if results:
            return results
    return None
def ddi_xmletree_to_metajson(ddi_root, source, rec_id_prefix):
    """ DDI xmletree -> MetaJSON Document """
    if ddi_root is None:
        return None
    
    document = Document()

    document["rec_type"] = constants.DOC_TYPE_DATASETQUALI

    if source:
        document["rec_source"] = source

    # stdyDscr/citation/titlStmt/titl
    ddi_stdydscr = ddi_root.find(xmletree.prefixtag("ddi", "stdyDscr"))
    if ddi_stdydscr is not None:
        ddi_stdydscr_citation = ddi_stdydscr.find(xmletree.prefixtag("ddi", "citation"))
        ddi_stdydscr_citation_titlstmt = ddi_stdydscr_citation.find(xmletree.prefixtag("ddi", "titlStmt"))
        ddi_stdydscr_citation_titlstmt_titl = ddi_stdydscr_citation_titlstmt.find(xmletree.prefixtag("ddi", "titl"))
        if ddi_stdydscr_citation_titlstmt_titl is not None:
            document["title"] = ddi_stdydscr_citation_titlstmt_titl.text

    return document
Exemple #19
0
def get_rml_addresses(rml):
    """ address -> addresses """
    result = {}
    rml_addresses = rml.findall(xmletree.prefixtag("rml", "address"))
    if rml_addresses is not None:
        addresses = []
        for rml_address in rml_addresses:
            if rml_address is not None:

                # country -> country
                country = xmletree.get_element_text(rml_address.find(xmletree.prefixtag("rml", "country")))

                # locality_city_town -> locality_city_town
                locality_city_town = xmletree.get_element_text(rml_address.find(xmletree.prefixtag("rml", "localityCityTown")))

                # post_code -> post_code
                post_code = xmletree.get_element_text(rml_address.find(xmletree.prefixtag("rml", "postCode")))

                # @preferred -> preferred
                preferred = xmletree.get_element_attribute_as_boolean(rml_address, "preferred")

                # @relationType -> relation_type
                relation_type = rml_address.get("relationType")

                # street -> street
                street = xmletree.get_element_text(rml_address.find(xmletree.prefixtag("rml", "street")))

                # @visible -> visible
                visible = xmletree.get_element_attribute_as_boolean(rml_address, "visible")

                # address -> addresses[i]
                address = metajson_service.create_address(street, post_code, locality_city_town, country, preferred, relation_type, visible)

                if address:
                    addresses.append(address)
        if addresses:
            result["addresses"] = addresses
    return result
Exemple #20
0
def get_rml_ongoing_researches(rml):
    """ ongoingResearch -> ongoing_researches """
    result = {}
    rml_ors = rml.findall(xmletree.prefixtag("rml", "ongoingResearch"))
    if rml_ors is not None:
        ongoing_researches = {}
        for rml_or in rml_ors:
            rml_descriptions = rml_or.findall(xmletree.prefixtag("rml", "description"))
            if rml_descriptions is not None:
                for rml_description in rml_descriptions:
                    if rml_description is not None:
                        if rml_description.text is not None:
                            value = rml_description.text.strip()
                            if value is not None:
                                language = rml_description.get(xmletree.prefixtag("xml", "lang"))
                                if language is not None:
                                    if language in ongoing_researches:
                                        ongoing_researches[language].append(value)
                                    else:
                                        ongoing_researches[language] = [value]
        if ongoing_researches:
            result["ongoing_researches"] = ongoing_researches
    return result
def extract_dmdsecs(mets):
    #logging.debug("dmdsecs")
    dmdsecs = mets.findall(xmletree.prefixtag("mets", "dmdSec"))
    if dmdsecs:
        warppers = []
        for dmdsec in dmdsecs:
            warpper = Warpper()

            warpper['rec_id'] = dmdsec.get("ID")
            warpper['rec_id_group'] = dmdsec.get("GROUPID")

            mdwrap = dmdsec.find(xmletree.prefixtag("mets", "mdWrap"))
            warpper['meta_type'] = mdwrap.get("MDTYPE")
            xmldatas = mdwrap.findall(xmletree.prefixtag("mets", "xmlData/*"))
            if xmldatas is not None:
                warpper['records'] = []
                for xmldata in xmldatas:
                    document = convert_xmldata(xmldata, warpper['meta_type'])
                    warpper['records'].append(document)

            warppers.append(warpper)

        return warppers
Exemple #22
0
def extract_dmdsecs(mets):
    #logging.debug("dmdsecs")
    dmdsecs = mets.findall(xmletree.prefixtag("mets", "dmdSec"))
    if dmdsecs:
        warppers = []
        for dmdsec in dmdsecs:
            warpper = Warpper()

            warpper['rec_id'] = dmdsec.get("ID")
            warpper['rec_id_group'] = dmdsec.get("GROUPID")

            mdwrap = dmdsec.find(xmletree.prefixtag("mets", "mdWrap"))
            warpper['meta_type'] = mdwrap.get("MDTYPE")
            xmldatas = mdwrap.findall(xmletree.prefixtag("mets", "xmlData/*"))
            if xmldatas is not None:
                warpper['records'] = []
                for xmldata in xmldatas:
                    document = convert_xmldata(xmldata, warpper['meta_type'])
                    warpper['records'].append(document)

            warppers.append(warpper)

        return warppers
Exemple #23
0
def get_rml_images(rml, role):
    """ image -> resources[0] """
    result = {}
    rml_images = rml.findall(xmletree.prefixtag("rml", "image"))
    if rml_images is not None:
        resources = []
        for rml_image in rml_images:
            if rml_image is not None:
                url = xmletree.get_element_text(rml_image)
                resource = metajson_service.create_resource_remote(url, None, role)
                if resource is not None:
                    resources.append(resource)
        if resources:
            result["resources"] = resources
    return result
Exemple #24
0
def get_rml_money_and_set_key(rml, element, key):
    """ element -> key """
    result = {}
    rml_element = rml.find(xmletree.prefixtag("rml", element))
    if rml_element is not None:
        money = {}

        # currency -> currency
        money.update(xmletree.get_element_attribute_and_set_key(rml_element, "currency", "currency"))

        # text -> value
        money["value"] = xmletree.get_element_text(rml_element)

        if money:
            result[key] = money
    return result
Exemple #25
0
def get_rml_teachings(rml):
    """ teaching -> teachings """
    result = {}
    rml_teachings = rml.findall(xmletree.prefixtag("rml", "teaching"))
    if rml_teachings is not None:
        teachings = []
        for rml_teaching in rml_teachings:
            if rml_teaching is not None:
                teaching = {}

                # dateBegin -> date_begin
                teaching.update(get_rml_element_text_and_set_key(rml_teaching, "dateBegin", "date_begin"))

                # dateEnd -> date_end
                teaching.update(get_rml_element_text_and_set_key(rml_teaching, "dateEnd", "date_end"))

                # description -> descriptions[i]
                teaching.update(get_rml_textlangs_and_set_key(rml_teaching, "description", "descriptions"))

                # level -> level
                teaching.update(get_rml_element_text_and_set_key(rml_teaching, "level", "level"))

                # title -> title
                teaching.update(get_rml_element_text_and_set_key(rml_teaching, "title", "title"))

                # creators
                # name -> creators[0].agent.name
                name = get_rml_element_text(rml_teaching, "name")
                creator = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_ORGUNIT, "dgg")
                if creator is None:
                    creator = Creator()
                    creator["agent"] = Orgunit()
                    creator["roles"] = "dgg"

                # identifiers -> creators[0].agent.rec_id or creators[0].agent.identifiers
                creator["agent"].update(get_rml_identifiers(rml_teaching))

                if "name" in creator["agent"] or "rec_id" in creator["agent"] or "identifiers" in creator["agent"]:
                    teaching["creators"] = [creator]

                if teaching is not None:
                    teachings.append(teaching)
        if teachings:
            result["teachings"] = teachings
    return result
Exemple #26
0
def get_rml_degrees(rml):
    """ degree -> degrees """
    result = {}
    rml_degrees = rml.findall(xmletree.prefixtag("rml", "degree"))
    if rml_degrees is not None:
        degrees = []
        for rml_degree in rml_degrees:
            if rml_degree is not None:
                degree = {}

                # dateBegin -> date_begin
                degree.update(get_rml_element_text_and_set_key(rml_degree, "dateBegin", "date_begin"))

                # dateEnd -> date_end
                degree.update(get_rml_element_text_and_set_key(rml_degree, "dateEnd", "date_end"))

                # description -> descriptions
                degree.update(get_rml_textlangs_and_set_key(rml_degree, "description", "descriptions"))

                # level -> level
                degree.update(get_rml_element_text_and_set_key(rml_degree, "level", "level"))

                # title -> title
                degree.update(get_rml_element_text_and_set_key(rml_degree, "title", "title"))

                # creators
                # name -> creators[0].agent.name
                name = get_rml_element_text(rml_degree, "name")
                creator = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_ORGUNIT, "dgg")
                if creator is None:
                    creator = Creator()
                    creator["agent"] = Orgunit()
                    creator["roles"] = ["dgg"]

                # identifiers -> creators[0].agent.rec_id or creators[0].agent.identifiers
                creator["agent"].update(get_rml_identifiers(rml_degree))

                if "name" in creator["agent"] or "rec_id" in creator["agent"] or "identifiers" in creator["agent"]:
                    degree["creators"] = [creator]

                if degree is not None:
                    degrees.append(degree)
        if degrees:
            result["degrees"] = degrees
    return result
Exemple #27
0
def get_rml_participants(rml):
    """ participant -> creators """
    result = {}
    rml_participants = rml.findall(xmletree.prefixtag("rml", "participant"))
    if rml_participants is not None:
        creators = []
        for rml_participant in rml_participants:
            if rml_participant is not None:
                creator_name = get_rml_element_text(rml_participant, "name")
                if creator_name:
                    creator_rec_class = xmletree.get_element_attribute(rml_participant, "entityType")
                    if creator_rec_class:
                        creator_rec_class = creator_rec_class.title()
                    creator = creator_service.formatted_name_to_creator(creator_name, creator_rec_class, None)
                    if creator:
                        creators.append(creator)
        if creators:
            result["creators"] = creators
    return result
Exemple #28
0
def get_rml_turnovers(rml):
    """ turnover -> turnovers """
    result = {}
    rml_turnovers = rml.findall(xmletree.prefixtag("rml", "turnover"))
    if rml_turnovers is not None:
        turnovers = []
        for rml_turnover in rml_turnovers:
            if rml_turnover is not None:
                turnover = {}

                turnover.update(xmletree.get_element_attribute_and_set_key(rml_turnover, "currency", "currency"))
                turnover.update(xmletree.get_element_attribute_and_set_key(rml_turnover, "year", "year"))
                turnover["value"] = xmletree.get_element_text(rml_turnover)

                if turnover:
                    turnovers.append(turnover)
        if turnovers:
            result["turnovers"] = turnovers
    return result
Exemple #29
0
def get_rml_uris(rml):
    """ uri -> urls """
    result = {}
    rml_uris = rml.findall(xmletree.prefixtag("rml", "uri"))
    if rml_uris is not None:
        urls = []
        for rml_uri in rml_uris:
            if rml_uri is not None:
                preferred = xmletree.get_element_attribute_as_boolean(rml_uri, "preferred")
                relation_type = rml_uri.get("relationType")
                visible = xmletree.get_element_attribute_as_boolean(rml_uri, "visible")
                value = xmletree.get_element_text(rml_uri)

                url = metajson_service.create_url(value, preferred, relation_type, None, None, visible)
                if url:
                    urls.append(url)
        if urls:
            result["urls"] = urls
    return result
Exemple #30
0
def get_rml_headcounts(rml):
    """ headcount -> headcounts """
    result = {}
    rml_headcounts = rml.findall(xmletree.prefixtag("rml", "headcount"))
    if rml_headcounts is not None:
        headcounts = []
        for rml_headcount in rml_headcounts:
            if rml_headcount is not None:
                # @year -> year
                year = rml_headcount.get("year")
                # value
                value = rml_headcount.text.strip()
                if value is not None:
                    headcount = {"value": value}
                    if year is not None:
                        headcount["year"] = year.strip()
                    headcounts.append(headcount)
        if headcounts:
            result["headcounts"] = headcounts
    return result
Exemple #31
0
def tei_xmletree_to_metajson_list(tei_root, source, rec_id_prefix,
                                  only_first_record):
    """  TEI xmletree -> MetaJSON Document list"""
    if tei_root is not None:
        # TEI/text
        tei_text = tei_root.find(xmletree.prefixtag("tei", "text"))

        # TEI/text/body
        tei_body = tei_text.find(xmletree.prefixtag("tei", "body"))
        # TEI/text/body/listBibl
        tei_body_listbibl = tei_body.find(xmletree.prefixtag(
            "tei", "listBibl"))
        # TEI/text/body/listBibl/biblFull
        tei_body_listbibl_biblfulls = tei_body_listbibl.findall(
            xmletree.prefixtag("tei", "biblFull"))

        # TEI/text/back
        tei_back = tei_text.find(xmletree.prefixtag("tei", "back"))
        # TEI/text/back/div
        tei_back_divs = tei_back.findall(xmletree.prefixtag("tei", "div"))
        laboratories = []
        projects = []
        if tei_back_divs:
            for tei_back_div in tei_back_divs:
                if tei_back_div.get("type") == "laboratories":
                    orgs = tei_back_div.findall(
                        xmletree.prefixtag("tei", "org"))
                    for org in orgs:
                        laboratories.append(org_laboratory_to_metajson(org))
                elif tei_back_div.get("type") == "projects":
                    orgs = tei_back_div.findall(
                        xmletree.prefixtag("tei", "org"))
                    for org in orgs:
                        laboratories.append(org_project_to_metajson(org))

        for biblfull in tei_body_listbibl_biblfulls:
            yield biblfull_xmletree_to_metajson(biblfull, laboratories,
                                                projects, source)
Exemple #32
0
def get_rml_affiliations(rml):
    """ affiliation -> affiliations """
    result = {}
    rml_affiliations = rml.findall(xmletree.prefixtag("rml", "affiliation"))
    if rml_affiliations is not None:
        affiliations = []
        for rml_affiliation in rml_affiliations:
            if rml_affiliation is not None:
                # dateBegin -> date_begin
                date_begin = get_rml_element_text(rml_affiliation, "dateBegin")

                # dateEnd -> date_end
                date_end = get_rml_element_text(rml_affiliation, "dateEnd")

                # description -> descriptions
                descriptions = get_rml_textlangs_as_list(rml_affiliation, "description")

                # identifier -> agent.rec_id
                identifiers = get_rml_identifiers(rml_affiliation)
                rec_id = None
                if "rec_id" in identifiers and identifiers["rec_id"]:
                    rec_id = identifiers["rec_id"]

                # name -> agent.name
                name = get_rml_element_text(rml_affiliation, "name")

                # @preferred -> preferred
                preferred = xmletree.get_element_attribute_as_boolean(rml_affiliation, "preferred")

                # relationType -> role
                role = get_rml_element_text(rml_affiliation, "relationType")

                affiliation = metajson_service.create_affiliation(rec_id, name, role, date_begin, date_end, preferred, descriptions)
                if affiliation is not None:
                    affiliations.append(affiliation)
        if affiliations:
            result["affiliations"] = affiliations
    return result
Exemple #33
0
def openurl_xmletree_to_metajson_list(openurl_response, source, rec_id_prefix,
                                      only_first_record):
    documents = []
    if openurl_response is not None:
        #logging.debug(type(openurl_response))
        #logging.debug(openurl_response)
        # results
        openurl_results = openurl_response.find(
            xmletree.prefixtag("ssopenurl", "results"))
        if openurl_results is not None:
            # result
            openurl_result_list = openurl_results.findall(
                xmletree.prefixtag("ssopenurl", "result"))
            if openurl_result_list:
                for openurl_result in openurl_result_list:
                    document = Document()
                    if source:
                        document["source"] = source
                    # citation
                    openurl_citation = openurl_result.find(
                        xmletree.prefixtag("ssopenurl", "citation"))
                    if openurl_citation is not None:
                        # issn
                        openurl_issn = openurl_citation.find(
                            xmletree.prefixtag("ssopenurl", "issn"))
                        if openurl_issn is not None:
                            identifier_issn = Identifier()
                            identifier_issn["id_type"] = "issn"
                            identifier_issn["value"] = openurl_issn.text
                            document.add_item_to_key(identifier_issn,
                                                     "identifiers")
                        # eissn
                        openurl_eissn = openurl_citation.find(
                            xmletree.prefixtag("ssopenurl", "eissn"))
                        if openurl_eissn is not None:
                            identifier_eissn = Identifier()
                            identifier_eissn["id_type"] = "eissn"
                            identifier_eissn["value"] = openurl_eissn.text
                            document.add_item_to_key(identifier_eissn,
                                                     "identifiers")
                    # linkGroups
                    openurl_linkgroups = openurl_result.find(
                        xmletree.prefixtag("ssopenurl", "linkGroups"))
                    if openurl_linkgroups is not None:
                        # linkGroup
                        openurl_linkgroup_list = openurl_linkgroups.findall(
                            xmletree.prefixtag("ssopenurl", "linkGroup"))
                        if openurl_linkgroup_list is not None:
                            for openurl_linkgroup in openurl_linkgroup_list:
                                service_name = None
                                institution_name = None
                                period_begin = None
                                period_end = None
                                url = None
                                # holdingData
                                openurl_holdingdata = openurl_linkgroup.find(
                                    xmletree.prefixtag("ssopenurl",
                                                       "holdingData"))
                                if openurl_holdingdata is not None:
                                    # institution_name
                                    openurl_providername = openurl_holdingdata.find(
                                        xmletree.prefixtag(
                                            "ssopenurl", "providerName"))
                                    if openurl_providername is not None:
                                        institution_name = openurl_providername.text
                                    # service_name
                                    openurl_databasename = openurl_holdingdata.find(
                                        xmletree.prefixtag(
                                            "ssopenurl", "databaseName"))
                                    if openurl_databasename is not None:
                                        service_name = openurl_databasename.text
                                    # normalizedData
                                    openurl_normalizeddata = openurl_holdingdata.find(
                                        xmletree.prefixtag(
                                            "ssopenurl", "normalizedData"))
                                    if openurl_normalizeddata is not None:
                                        # startDate
                                        openurl_startdate = openurl_normalizeddata.find(
                                            xmletree.prefixtag(
                                                "ssopenurl", "startDate"))
                                        if openurl_startdate is not None:
                                            period_begin = openurl_startdate.text
                                        # endDate
                                        openurl_enddate = openurl_normalizeddata.find(
                                            xmletree.prefixtag(
                                                "ssopenurl", "endDate"))
                                        if openurl_enddate is not None:
                                            period_end = openurl_enddate.text
                                # url
                                openurl_url_list = openurl_linkgroup.findall(
                                    xmletree.prefixtag("ssopenurl", "url"))
                                if openurl_url_list is not None:
                                    for openurl_url in openurl_url_list:
                                        if openurl_url.get(
                                                "type") == "journal":
                                            url = openurl_url.text
                                        elif openurl_url.get(
                                                "type") == "source":
                                            url = openurl_url.text
                                if url:
                                    resource = Resource()
                                    resource["rec_type"] = "ResourceRemote"
                                    resource["rec_state"] = "published"
                                    resource["relation_type"] = "eResource"
                                    resource[
                                        "version_type"] = "publishedVersion"
                                    resource["access_rights"] = "closedAccess"
                                    resource["format_mimetype"] = "text/html"
                                    resource["url"] = url
                                    if service_name:
                                        resource["service_name"] = service_name
                                    if institution_name:
                                        resource[
                                            "institution_name"] = institution_name
                                    if period_begin:
                                        resource["period_begin"] = period_begin
                                    if period_end:
                                        resource["period_end"] = period_end
                                    document.add_item_to_key(
                                        resource, "resources")
                    documents.append(document)
                    if only_first_record:
                        break
    #logging.debug(jsonbson.dumps_json(documents))
    return documents
def didl_xmletree_to_metajson(root_item, source, rec_id_prefix):
    document = None
    resources = []

    items = root_item.findall(xmletree.prefixtag("didl", "Item"))
    if items:
        for item in items:
            # item types
            item_types = []
            item_date_modified = None
            descriptors = item.findall(xmletree.prefixtag("didl", "Descriptor"))
            if descriptors:
                for descriptor in descriptors:
                    statements = descriptor.findall(xmletree.prefixtag("didl", "Statement"))
                    if statements:
                        for statement in statements:
                            rdf_type = statement.find(xmletree.prefixtag("rdf", "type"))
                            if rdf_type is not None:
                                item_types.append(rdf_type.text)
                            dcterms_modified = statement.find(xmletree.prefixtag("dcterms", "modified"))
                            if dcterms_modified is not None:
                                item_date_modified = dcterms_modified.text

            # logging.debug("item_types: {}".format(item_types))

            if "info:eu-repo/semantics/descriptiveMetadata" in item_types:
                # metadata
                # logging.debug("metadata")
                component = item.find(xmletree.prefixtag("didl", "Component"))
                if component is not None:
                    resource = component.find(xmletree.prefixtag("didl", "Resource"))
                    if resource is not None:
                        mods = resource.find(xmletree.prefixtag("mods", "mods"))
                        if mods is not None:
                            # logging.debug("mods")
                            document = mods_crosswalk.mods_xmletree_to_metajson(mods, source, rec_id_prefix)
                            if item_date_modified:
                                document["rec_modified_date"] = item_date_modified

            elif "info:eu-repo/semantics/objectFile" in item_types:
                # resource
                # logging.debug("resource")
                url = None
                date_last_accessed = None
                relation_type = "publication"
                relation_version = None
                access_rights = "openAccess"
                rec_state = "published"
                format_mimetype = None
                rec_created_date = None
                rec_modified_date = None

                if "info:eu-repo/semantics/publishedVersion" in item_types:
                    relation_version = "publishedVersion"
                elif "info:eu-repo/semantics/authorVersion" in item_types:
                    relation_version = "authorVersion"

                if item_date_modified:
                    rec_modified_date = item_date_modified

                component = item.find(xmletree.prefixtag("didl", "Component"))
                if component is not None:
                    didl_resource = component.find(xmletree.prefixtag("didl", "Resource"))
                    if didl_resource is not None:
                        url = didl_resource.get("ref")
                        format_mimetype = didl_resource.get("mimeType")

                resource = metajson_service.create_resource_remote(
                    url,
                    date_last_accessed,
                    relation_type,
                    relation_version,
                    access_rights,
                    rec_state,
                    format_mimetype,
                    rec_created_date,
                    rec_modified_date,
                )
                resources.append(resource)

    if document and resources:
        document["resources"] = resources
    return document
def didl_xmletree_to_metajson_list(didl_root, source, rec_id_prefix, only_first_record):
    if didl_root is not None:
        item_list = didl_root.findall(xmletree.prefixtag("didl", "Item"))
        if item_list is not None:
            for item in item_list:
                yield didl_xmletree_to_metajson(item, source, rec_id_prefix)
def openurl_xmletree_to_metajson_list(openurl_response, source, rec_id_prefix, only_first_record):
    documents = []
    if openurl_response is not None:
        #logging.debug(type(openurl_response))
        #logging.debug(openurl_response)
        # results
        openurl_results = openurl_response.find(xmletree.prefixtag("ssopenurl", "results"))
        if openurl_results is not None:
            # result
            openurl_result_list = openurl_results.findall(xmletree.prefixtag("ssopenurl", "result"))
            if openurl_result_list:
                for openurl_result in openurl_result_list:
                    document = Document()
                    if source:
                        document["source"] = source
                    # citation
                    openurl_citation = openurl_result.find(xmletree.prefixtag("ssopenurl", "citation"))
                    if openurl_citation is not None:
                        # issn
                        openurl_issn = openurl_citation.find(xmletree.prefixtag("ssopenurl", "issn"))
                        if openurl_issn is not None:
                            identifier_issn = Identifier()
                            identifier_issn["id_type"] = "issn"
                            identifier_issn["value"] = openurl_issn.text
                            document.add_item_to_key(identifier_issn, "identifiers")
                        # eissn
                        openurl_eissn = openurl_citation.find(xmletree.prefixtag("ssopenurl", "eissn"))
                        if openurl_eissn is not None:
                            identifier_eissn = Identifier()
                            identifier_eissn["id_type"] = "eissn"
                            identifier_eissn["value"] = openurl_eissn.text
                            document.add_item_to_key(identifier_eissn, "identifiers")
                    # linkGroups
                    openurl_linkgroups = openurl_result.find(xmletree.prefixtag("ssopenurl", "linkGroups"))
                    if openurl_linkgroups is not None:
                        # linkGroup
                        openurl_linkgroup_list = openurl_linkgroups.findall(xmletree.prefixtag("ssopenurl", "linkGroup"))
                        if openurl_linkgroup_list is not None:
                            for openurl_linkgroup in openurl_linkgroup_list:
                                service_name = None
                                institution_name = None
                                period_begin = None
                                period_end = None
                                url = None
                                # holdingData
                                openurl_holdingdata = openurl_linkgroup.find(xmletree.prefixtag("ssopenurl", "holdingData"))
                                if openurl_holdingdata is not None:
                                    # institution_name
                                    openurl_providername = openurl_holdingdata.find(xmletree.prefixtag("ssopenurl", "providerName"))
                                    if openurl_providername is not None:
                                        institution_name = openurl_providername.text
                                    # service_name
                                    openurl_databasename = openurl_holdingdata.find(xmletree.prefixtag("ssopenurl", "databaseName"))
                                    if openurl_databasename is not None:
                                        service_name = openurl_databasename.text
                                    # normalizedData
                                    openurl_normalizeddata = openurl_holdingdata.find(xmletree.prefixtag("ssopenurl", "normalizedData"))
                                    if openurl_normalizeddata is not None:
                                        # startDate
                                        openurl_startdate = openurl_normalizeddata.find(xmletree.prefixtag("ssopenurl", "startDate"))
                                        if openurl_startdate is not None:
                                            period_begin = openurl_startdate.text
                                        # endDate
                                        openurl_enddate = openurl_normalizeddata.find(xmletree.prefixtag("ssopenurl", "endDate"))
                                        if openurl_enddate is not None:
                                            period_end = openurl_enddate.text
                                # url
                                openurl_url_list = openurl_linkgroup.findall(xmletree.prefixtag("ssopenurl", "url"))
                                if openurl_url_list is not None:
                                    for openurl_url in openurl_url_list:
                                        if openurl_url.get("type") == "journal":
                                            url = openurl_url.text
                                        elif openurl_url.get("type") == "source":
                                            url = openurl_url.text
                                if url:
                                    resource = Resource()
                                    resource["rec_type"] = "ResourceRemote"
                                    resource["rec_state"] = "published"
                                    resource["relation_type"] = "eResource"
                                    resource["version_type"] = "publishedVersion"
                                    resource["access_rights"] = "closedAccess"
                                    resource["format_mimetype"] = "text/html"
                                    resource["url"] = url
                                    if service_name:
                                        resource["service_name"] = service_name
                                    if institution_name:
                                        resource["institution_name"] = institution_name
                                    if period_begin:
                                        resource["period_begin"] = period_begin
                                    if period_end:
                                        resource["period_end"] = period_end
                                    document.add_item_to_key(resource, "resources")
                    documents.append(document)
                    if only_first_record:
                        break
    #logging.debug(jsonbson.dumps_json(documents))
    return documents
Exemple #37
0
def didl_xmletree_to_metajson(root_item, source, rec_id_prefix):
    document = None
    resources = []

    items = root_item.findall(xmletree.prefixtag("didl", "Item"))
    if items:
        for item in items:
            # item types
            item_types = []
            item_date_modified = None
            descriptors = item.findall(xmletree.prefixtag("didl", "Descriptor"))
            if descriptors:
                for descriptor in descriptors:
                    statements = descriptor.findall(xmletree.prefixtag("didl", "Statement"))
                    if statements:
                        for statement in statements:
                            rdf_type = statement.find(xmletree.prefixtag("rdf", "type"))
                            if rdf_type is not None:
                                item_types.append(rdf_type.text)
                            dcterms_modified = statement.find(xmletree.prefixtag("dcterms", "modified"))
                            if dcterms_modified is not None:
                                item_date_modified = dcterms_modified.text

            #logging.debug("item_types: {}".format(item_types))

            if 'info:eu-repo/semantics/descriptiveMetadata' in item_types:
                # metadata
                #logging.debug("metadata")
                component = item.find(xmletree.prefixtag("didl", "Component"))
                if component is not None:
                    resource = component.find(xmletree.prefixtag("didl", "Resource"))
                    if resource is not None:
                        mods = resource.find(xmletree.prefixtag("mods", "mods"))
                        if mods is not None:
                            #logging.debug("mods")
                            document = mods_crosswalk.mods_xmletree_to_metajson(mods, source, rec_id_prefix)
                            if item_date_modified:
                                document["rec_modified_date"] = item_date_modified

            elif 'info:eu-repo/semantics/objectFile' in item_types:
                # resource
                #logging.debug("resource")
                url = None
                date_last_accessed = None
                relation_type = "publication"
                relation_version = None
                access_rights = "openAccess"
                rec_state = "published"
                format_mimetype = None
                rec_created_date = None
                rec_modified_date = None

                if 'info:eu-repo/semantics/publishedVersion' in item_types:
                    relation_version = "publishedVersion"
                elif 'info:eu-repo/semantics/authorVersion' in item_types:
                    relation_version = "authorVersion"

                if item_date_modified:
                    rec_modified_date = item_date_modified

                component = item.find(xmletree.prefixtag("didl", "Component"))
                if component is not None:
                    didl_resource = component.find(xmletree.prefixtag("didl", "Resource"))
                    if didl_resource is not None:
                        url = didl_resource.get("ref")
                        format_mimetype = didl_resource.get("mimeType")

                resource = metajson_service.create_resource_remote(url, date_last_accessed, relation_type, relation_version, access_rights, rec_state, format_mimetype, rec_created_date, rec_modified_date)
                resources.append(resource)

    if document and resources:
        document["resources"] = resources
    return document
Exemple #38
0
def didl_xmletree_to_metajson_list(didl_root, source, rec_id_prefix, only_first_record):
    if didl_root is not None:
        item_list = didl_root.findall(xmletree.prefixtag("didl", "Item"))
        if item_list is not None:
            for item in item_list:
                yield didl_xmletree_to_metajson(item, source, rec_id_prefix)
Exemple #39
0
def biblfull_xmletree_to_metajson(biblfull, laboratories, projects, source):
    """ biblFull xmletree -> MetaJSON Document """
    if biblfull is None:
        return None

    document = Document()

    # titleStmt
    tei_titlestmt = biblfull.find(xmletree.prefixtag("tei", "titleStmt"))
    # editionStmt
    tei_editionstmt = biblfull.find(xmletree.prefixtag("tei", "editionStmt"))
    # extent
    tei_extent = biblfull.find(xmletree.prefixtag("tei", "extent"))
    # publicationStmt
    tei_publicationstmt = biblfull.find(
        xmletree.prefixtag("tei", "publicationStmt"))
    # seriesStmt
    tei_seriesstmt = biblfull.find(xmletree.prefixtag("tei", "seriesStmt"))
    # notesStmt
    tei_notesstmt = biblfull.find(xmletree.prefixtag("tei", "notesStmt"))
    # sourceDesc
    tei_sourcedescs = biblfull.findall(xmletree.prefixtag("tei", "sourceDesc"))
    # profileDesc
    tei_profiledesc = biblfull.find(xmletree.prefixtag("tei", "profileDesc"))
    tei_langusage = tei_profiledesc.find(xmletree.prefixtag(
        "tei", "langUsage"))
    tei_languages = tei_langusage.findall(xmletree.prefixtag(
        "tei", "language"))
    tei_textclass = tei_profiledesc.find(xmletree.prefixtag(
        "tei", "textClass"))
    tei_keywords = tei_textclass.findall(xmletree.prefixtag("tei", "keywords"))
    tei_classcodes = tei_textclass.findall(
        xmletree.prefixtag("tei", "classCode"))

    # language
    doc_language = None
    if tei_languages:
        languages = []
        for tei_language in tei_languages:
            language = tei_language.get("ident")
            languages.append(language)
        if languages:
            document["languages"] = languages
            doc_language = languages[0]

    # title
    document.update(get_tei_titles_to_metason(tei_titlestmt, doc_language))

    metajson_service.pretty_print_document(document)
    metajson_service.print_document(document)
    return document
Exemple #40
0
def get_rml_self_archiving_policy(rml):
    """ ckbData -> self_archiving_policy """
    result = {}
    rml_ckbdata = rml.find(xmletree.prefixtag("rml", "ckbData"))
    if rml_ckbdata is not None:

        sap = {}
        # romeoPublisher -> .
        rml_romeo = rml_ckbdata.find(xmletree.prefixtag("rml", "romeoPublisher"))
        if rml_romeo is not None:
            # publisher : don't repeate this information
            #publisher = Orgunit()
            #publisher["rec_type"] = "publisher"

            # alias -> publisher.acronym
            #publisher.update(get_rml_element_text_and_set_key(rml_romeo, "alias", "acronym"))

            # homeurl -> publisher.urls[]
            #rml_homeurl_value = xmletree.get_element_text(rml_romeo.find(xmletree.prefixtag("rml", "homeurl")))
            #if rml_homeurl_value:
            #    publisher["urls"] = [metajson_service.create_url(rml_homeurl_value, True, "work", None, None, True)]

            # id -> publisher.identifiers[i]
            #rml_id_value = xmletree.get_element_text(rml_romeo.find(xmletree.prefixtag("rml", "id")))
            #if rml_id_value:
            #    publisher["identifiers"] = [metajson_service.create_identifier("romeo", rml_id_value)]

            # name -> publisher.name
            #publisher.update(get_rml_element_text_and_set_key(rml_romeo, "name", "name"))

            #sap["publisher"] = publisher

            # conditions.condition -> conditions[]
            rml_conditions = rml_romeo.find(xmletree.prefixtag("rml", "conditions"))
            if rml_conditions is not None:
                rml_conditions_list = rml_conditions.findall(xmletree.prefixtag("rml", "condition"))
                if rml_conditions_list is not None:
                    conditions = []
                    for rml_condition in rml_conditions_list:
                        value = xmletree.get_element_text(rml_condition)
                        if value:
                            conditions.append(value)
                    if conditions:
                        sap["conditions"] = conditions

            # copyright -> copyright
            sap.update(get_rml_element_text_and_set_key(rml_romeo, "copyright", "copyright"))

            # copyrightlinks -> copyright_urls
            rml_copyrightlinks = rml_romeo.find(xmletree.prefixtag("rml", "copyrightlinks"))
            if rml_copyrightlinks is not None:
                rml_copyrightlinks_list = rml_copyrightlinks.findall(xmletree.prefixtag("rml", "copyrightlink"))
                if rml_copyrightlinks_list is not None:
                    copyright_urls = []
                    for rml_copyrightlink in rml_copyrightlinks_list:
                        copyrightlinktext = xmletree.get_element_text(rml_copyrightlink.find(xmletree.prefixtag("rml", "copyrightlinktext")))
                        copyrightlinkurl = xmletree.get_element_text(rml_copyrightlink.find(xmletree.prefixtag("rml", "copyrightlinkurl")))
                        copyright_url = metajson_service.create_url(copyrightlinkurl, None, None, copyrightlinktext, None, None)
                        copyright_urls.append(copyright_url)
                    if copyright_urls:
                        sap["copyright_urls"] = copyright_urls

            # paidaccess -> paid_access
            rml_paidaccess = rml_romeo.find(xmletree.prefixtag("rml", "paidaccess"))
            if rml_paidaccess is not None:
                paid_access = {}

                # paidaccessname -> label
                paid_access.update(get_rml_element_text_and_set_key(rml_paidaccess, "paidaccessname", "label"))

                # paidaccessurl -> url
                paid_access.update(get_rml_element_text_and_set_key(rml_paidaccess, "paidaccessurl", "url"))

                # paidaccessnotes -> notes
                # rml_paidaccessnotes = rml_paidaccess.findall(xmletree.prefixtag("rml", "paidaccessnotes"))

                sap["paid_access"] = paid_access

            # postprints -> postprint
            rml_postprints = rml_romeo.find(xmletree.prefixtag("rml", "postprints"))
            if rml_postprints is not None:
                postprint = {}

                # postarchiving -> possibility
                postprint.update(get_rml_element_text_and_set_key(rml_postprints, "postarchiving", "possibility"))

                # postrestrictions -> restrictions
                postprint.update(get_rml_textlangs_and_set_key(rml_postprints, "postrestrictions", "restrictions"))

                sap["postprint"] = postprint

            # preprints -> preprint
            rml_preprints = rml_romeo.find(xmletree.prefixtag("rml", "preprints"))
            if rml_preprints is not None:
                preprint = {}

                # prearchiving -> possibility
                preprint.update(get_rml_element_text_and_set_key(rml_preprints, "prearchiving", "possibility"))

                # prerestrictions -> restrictions
                preprint.update(get_rml_textlangs_and_set_key(rml_preprints, "prerestrictions", "pre_restrictions"))

                sap["preprint"] = preprint

            # romeocolour -> romeo_color
            sap.update(get_rml_element_text_and_set_key(rml_romeo, "romeocolour", "romeo_color"))

        if sap:
            result["self_archiving_policy"] = sap
    return result
def get_mods_element_text_as_boolean(rml, element):    
    element_xmletree = rml.find(xmletree.prefixtag("mods", element))
    return xmletree.get_element_text_as_boolean(element_xmletree)