def load_dict(meta_dict): if "rec_class" not in meta_dict: return Common(meta_dict) elif meta_dict["rec_class"] == "Document": return Document(meta_dict) elif meta_dict["rec_class"] == "Person": return Person(meta_dict) elif meta_dict["rec_class"] == "Orgunit": return Orgunit(meta_dict) elif meta_dict["rec_class"] == "Project": return Project(meta_dict) elif meta_dict["rec_class"] == "Event": return Event(meta_dict) elif meta_dict["rec_class"] == "Family": return Family(meta_dict) elif meta_dict["rec_class"] == "Field": return Field(meta_dict) elif meta_dict["rec_class"] == "Resource": return Resource(meta_dict) elif meta_dict["rec_class"] == "Target": return Target(meta_dict) elif meta_dict["rec_class"] == "Type": return Type(meta_dict) elif meta_dict["rec_class"] == "Collection": return Collection(meta_dict) else: logging.debug(jsonbson.dumps_bson(meta_dict)) logging.warning("Unknown rec_class: {O}".format(meta_dict["rec_class"])) return Common(meta_dict)
def get_rml_relationships(rml): """ relationship -> relationships """ result = {} rml_relationships = rml.findall(xmletree.prefixtag("rml", "relationship")) if rml_relationships is not None: relationships = [] for rml_relationship in rml_relationships: if rml_relationship is not None: # name -> agent.name name = get_rml_element_text(rml_relationship, "name") relationship = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_PERSON, None) if relationship is None: relationship = {} relationship["agent"] = Person() # identifier -> agent.rec_id & agent.identifiers relationship["agent"].update(get_rml_identifiers(rml_relationship)) # relationType -> relation_type relationship.update(get_rml_element_text_and_set_key(rml_relationship, "relationType", "relation_type")) # descriptions -> descriptions relationship.update(get_rml_textlangs_and_set_key(rml_relationship, "description", "descriptions")) if relationship is not None: relationships.append(relationship) if relationships: result["relationships"] = relationships return result
def convert_creators(sum_doc, sum_authors_key, sum_affiliations_key, creator_type, role): creators = [] if sum_authors_key in sum_doc: sum_authors = sum_doc[sum_authors_key] if sum_authors: affiliations_dict = {} if sum_affiliations_key in sum_doc: sum_affiliations = sum_doc[sum_affiliations_key] if sum_affiliations: for index, affiliation in enumerate(sum_affiliations): affiliations_dict[index + 1] = affiliation for author in sum_authors: creator = Creator() if "surname" in author and "givenname" in author: person = Person() person.set_key_if_not_none("name_family", author["surname"]) person.set_key_if_not_none("name_given", author["givenname"]) creator["agent"] = person if role: creator["roles"] = [role] else: formatted_name = "" if "fullname" in author: formatted_name = author["fullname"] elif "name" in author: formatted_name = author["name"] creator = creator_service.formatted_name_to_creator(formatted_name, creator_type, role) if "sequence" in author and author["sequence"] in affiliations_dict: creator["affiliation"] = affiliations_dict[author["sequence"]] # todo : location dans le cas des DissertationSchool_xml if creator: creators.append(creator) return creators
def formatted_name_to_creator(formatted_name, rec_class, role): #logging.debug("formatted_name_to_creator formatted_name: {}".format(formatted_name)) if formatted_name: formatted_name = formatted_name.strip() event = None family = None orgunit = None person = None #logging.debug("formatted_name: {}".format(formatted_name)) # rec_class determination if rec_class is None or rec_class not in [ constants.REC_CLASS_EVENT, constants.REC_CLASS_FAMILY, constants.REC_CLASS_ORGUNIT, constants.REC_CLASS_PERSON ]: for event_term in creator_event_terms: if event_term in formatted_name.lower(): rec_class = constants.REC_CLASS_EVENT break for orgunit_term in creator_orgunit_terms: if orgunit_term in formatted_name.lower(): rec_class = constants.REC_CLASS_ORGUNIT break if rec_class is None: rec_class = constants.REC_CLASS_PERSON creator = Creator() if role: creator["roles"] = [role] if rec_class == constants.REC_CLASS_EVENT: event = Event() event["title"] = formatted_name creator["agent"] = event elif rec_class == constants.REC_CLASS_ORGUNIT: orgunit = Orgunit() orgunit["name"] = formatted_name creator["agent"] = orgunit elif rec_class == constants.REC_CLASS_PERSON or rec_class == constants.REC_CLASS_FAMILY: # class is "Person" or "Family" name_given = "" name_middle = "" name_family = "" name_prefix = "" name_terms_of_address = "" date_birth = "" date_death = "" parenthesis_index = formatted_name.rfind("(") if parenthesis_index != -1: #may be like: name (date_birth-date_death) dates_part = formatted_name[parenthesis_index + 1:-1].strip() date_birth = dates_part[:4] date_death = dates_part[5:] if date_death == "....": date_death = "" formatted_name = formatted_name[:parenthesis_index].strip() slash_index = formatted_name.find("/") if slash_index != -1: #like: name/affiliation affiliation_name = formatted_name[slash_index + 1:].strip() formatted_name = formatted_name[:slash_index].strip() commaspacejrdot_index = formatted_name.rfind(", Jr.") if (commaspacejrdot_index != -1): #like "Paul B. Harvey, Jr." formatted_name = formatted_name[:commaspacejrdot_index].strip() name_middle = "Jr." #Is it formatted like "Family, Given" or "Given Family" ? comma_index = formatted_name.find(",") if comma_index == -1: space_index = formatted_name.rfind(" ") #logging.debug(formatted_name) #logging.debug(space_index) if space_index != -1: #like Given Family name_given = formatted_name[:space_index].strip() name_family = formatted_name[space_index + 1:].strip() else: #like Family name_family = formatted_name.strip() else: #like Family, Given name_family = formatted_name[:comma_index].strip() name_given = formatted_name[comma_index + 1:].strip() # manage the terms_of_address and particule for term_of_address in creator_person_terms_of_address: if name_family and name_family.lower().startswith( term_of_address + " "): name_terms_of_address = name_family[:len(term_of_address)] name_family = name_family[len(term_of_address):].strip() if name_given: if name_given.lower().endswith(" " + term_of_address): name_terms_of_address = name_given[-len(term_of_address ):] name_given = name_given[:-len(term_of_address)].strip() if name_given.lower().startswith(term_of_address + " "): name_terms_of_address = name_given[:len(term_of_address )] name_given = name_given[len(term_of_address):].strip() if name_given.lower() == term_of_address: name_terms_of_address = name_given name_given = None # Be careful with a particule inside the name like: Viveiros de Castro, Eduardo for particule in creator_particule: if name_family and name_family.lower().startswith(particule + " "): name_prefix = name_family[0:len(particule)] name_family = name_family[len(particule):].strip() if name_given: if name_given.lower().endswith(" " + particule): name_prefix = name_given[-len(particule):] name_given = name_given[:-len(particule)].strip() if name_given.lower().startswith(particule + " "): name_prefix = name_given[:len(particule)] name_given = name_given[len(particule):].strip() if name_given.lower() == particule: name_prefix = name_given name_given = None if rec_class == constants.REC_CLASS_PERSON: person = Person() person.set_key_if_not_none("name_family", name_family) person.set_key_if_not_none("name_given", name_given) person.set_key_if_not_none("name_middle", name_middle) person.set_key_if_not_none("name_terms_of_address", name_terms_of_address) person.set_key_if_not_none("name_prefix", name_prefix) person.set_key_if_not_none("date_birth", date_birth) person.set_key_if_not_none("date_death", date_death) creator["agent"] = person if 'affiliation_name' in vars() and affiliation_name: #todo manage as an object affiliation = Orgunit() affiliation["name"] = affiliation_name creator["affiliation"] = affiliation elif rec_class == constants.REC_CLASS_FAMILY: family = Family() family.set_key_if_not_none("name_family", name_family) creator["agent"] = family #logging.debug(jsonbson.dumps_json(creator, True)) return creator
def formatted_name_to_creator(formatted_name, rec_class, role): #logging.debug("formatted_name_to_creator formatted_name: {}".format(formatted_name)) if formatted_name: formatted_name = formatted_name.strip() event = None family = None orgunit = None person = None #logging.debug("formatted_name: {}".format(formatted_name)) # rec_class determination if rec_class is None or rec_class not in [constants.REC_CLASS_EVENT, constants.REC_CLASS_FAMILY, constants.REC_CLASS_ORGUNIT, constants.REC_CLASS_PERSON]: for event_term in creator_event_terms: if event_term in formatted_name.lower(): rec_class = constants.REC_CLASS_EVENT break for orgunit_term in creator_orgunit_terms: if orgunit_term in formatted_name.lower(): rec_class = constants.REC_CLASS_ORGUNIT break if rec_class is None: rec_class = constants.REC_CLASS_PERSON creator = Creator() if role: creator["roles"] = [role] if rec_class == constants.REC_CLASS_EVENT: event = Event() event["title"] = formatted_name creator["agent"] = event elif rec_class == constants.REC_CLASS_ORGUNIT: orgunit = Orgunit() orgunit["name"] = formatted_name creator["agent"] = orgunit elif rec_class == constants.REC_CLASS_PERSON or rec_class == constants.REC_CLASS_FAMILY: # class is "Person" or "Family" name_given = "" name_middle = "" name_family = "" name_prefix = "" name_terms_of_address = "" date_birth = "" date_death = "" parenthesis_index = formatted_name.rfind("(") if parenthesis_index != -1: #may be like: name (date_birth-date_death) dates_part = formatted_name[parenthesis_index + 1:-1].strip() date_birth = dates_part[:4] date_death = dates_part[5:] if date_death == "....": date_death = "" formatted_name = formatted_name[:parenthesis_index].strip() slash_index = formatted_name.find("/") if slash_index != -1: #like: name/affiliation affiliation_name = formatted_name[slash_index + 1:].strip() formatted_name = formatted_name[:slash_index].strip() commaspacejrdot_index = formatted_name.rfind(", Jr.") if (commaspacejrdot_index != -1): #like "Paul B. Harvey, Jr." formatted_name = formatted_name[:commaspacejrdot_index].strip() name_middle = "Jr." #Is it formatted like "Family, Given" or "Given Family" ? comma_index = formatted_name.find(",") if comma_index == -1: space_index = formatted_name.rfind(" ") #logging.debug(formatted_name) #logging.debug(space_index) if space_index != -1: #like Given Family name_given = formatted_name[:space_index].strip() name_family = formatted_name[space_index+1:].strip() else: #like Family name_family = formatted_name.strip() else: #like Family, Given name_family = formatted_name[:comma_index].strip() name_given = formatted_name[comma_index+1:].strip() # manage the terms_of_address and particule for term_of_address in creator_person_terms_of_address: if name_family and name_family.lower().startswith(term_of_address+" "): name_terms_of_address = name_family[:len(term_of_address)] name_family = name_family[len(term_of_address):].strip() if name_given: if name_given.lower().endswith(" "+term_of_address): name_terms_of_address = name_given[-len(term_of_address):] name_given = name_given[:-len(term_of_address)].strip() if name_given.lower().startswith(term_of_address+" "): name_terms_of_address = name_given[:len(term_of_address)] name_given = name_given[len(term_of_address):].strip() if name_given.lower() == term_of_address: name_terms_of_address = name_given name_given = None # Be careful with a particule inside the name like: Viveiros de Castro, Eduardo for particule in creator_particule: if name_family and name_family.lower().startswith(particule+" "): name_prefix = name_family[0:len(particule)] name_family = name_family[len(particule):].strip() if name_given: if name_given.lower().endswith(" "+particule): name_prefix = name_given[-len(particule):] name_given = name_given[:-len(particule)].strip() if name_given.lower().startswith(particule+" "): name_prefix = name_given[:len(particule)] name_given = name_given[len(particule):].strip() if name_given.lower() == particule: name_prefix = name_given name_given = None if rec_class == constants.REC_CLASS_PERSON: person = Person() person.set_key_if_not_none("name_family", name_family) person.set_key_if_not_none("name_given", name_given) person.set_key_if_not_none("name_middle", name_middle) person.set_key_if_not_none("name_terms_of_address", name_terms_of_address) person.set_key_if_not_none("name_prefix", name_prefix) person.set_key_if_not_none("date_birth", date_birth) person.set_key_if_not_none("date_death", date_death) creator["agent"] = person if 'affiliation_name' in vars() and affiliation_name: #todo manage as an object affiliation = Orgunit() affiliation["name"] = affiliation_name creator["affiliation"] = affiliation elif rec_class == constants.REC_CLASS_FAMILY: family = Family() family.set_key_if_not_none("name_family", name_family) creator["agent"] = family #logging.debug(jsonbson.dumps_json(creator, True)) return creator
def rml_person_to_metajson(rml_person, source, rec_id_prefix): """ person -> person """ person = Person() # source if source: person["rec_source"] = source # academicTitle, honorificTitle -> titles person.update(get_rml_titles(rml_person)) # address -> addresses person.update(get_rml_addresses(rml_person)) # affiliation -> affiliations person.update(get_rml_affiliations(rml_person)) # award -> awards person.update(get_rml_textlangs_and_set_key(rml_person, "award", "awards")) # biography -> biographies person.update(get_rml_textlangs_and_set_key(rml_person, "biography", "biographies")) # dateOfBirth -> date_birth person.update(get_rml_element_text_and_set_key(rml_person, "dateOfBirth", "date_birth")) # dateOfDeath -> date_death person.update(get_rml_element_text_and_set_key(rml_person, "dateOfDeath", "date_death")) # degree -> degrees person.update(get_rml_degrees(rml_person)) # email -> emails person.update(get_rml_emails(rml_person)) # @fictitious -> fictitious person.update(xmletree.get_element_attribute_as_boolean_and_set_key(rml_person, "fictitious", "fictitious")) # firstname -> name_given person.update(get_rml_element_text_and_set_key(rml_person, "firstname", "name_given")) # identifier -> identifiers & rec_id person.update(get_rml_identifiers(rml_person)) # image -> resources[i] person.update(get_rml_images(rml_person, "picture")) # instantMessage -> instant_messages person.update(get_rml_instant_messages(rml_person)) # languageCapability -> language_capabilities person.update(get_rml_language_capabilities(rml_person)) # lastname -> name_family person.update(get_rml_element_text_and_set_key(rml_person, "lastname", "name_family")) # lastnamePrefix -> name_prefix person.update(get_rml_element_text_and_set_key(rml_person, "lastnamePrefix", "name_prefix")) # lastnameSuffix -> name_suffix person.update(get_rml_element_text_and_set_key(rml_person, "lastnameSuffix", "name_suffix")) # middlename -> name_middle person.update(get_rml_element_text_and_set_key(rml_person, "middlename", "name_middle")) # nationality -> nationality person.update(get_rml_element_text_and_set_key(rml_person, "nationality", "nationality")) # nickname -> name_nick person.update(get_rml_element_text_and_set_key(rml_person, "nickname", "name_nick")) # note -> notes person.update(get_rml_textlangs_and_set_key(rml_person, "note", "notes")) # olBiography -> biographies_short person.update(get_rml_textlangs_and_set_key(rml_person, "olBiography", "biographies_short")) # ongoingResearch -> ongoing_researches person.update(get_rml_ongoing_researches(rml_person)) # phone -> phones person.update(get_rml_phones(rml_person)) # relationship -> relationships person.update(get_rml_relationships(rml_person)) # researchCoverage -> research_coverages person.update(get_rml_research_coverages(rml_person)) # responsability -> responsabilities person.update(get_rml_textlangs_and_set_key(rml_person, "responsability", "responsabilities")) # sex -> gender person.update(get_rml_element_text_and_set_key(rml_person, "sex", "gender")) # skill -> skills person.update(get_rml_textlangs_and_set_key(rml_person, "skill", "skills")) # teaching -> teachings person.update(get_rml_teachings(rml_person)) # uri -> urls person.update(get_rml_uris(rml_person)) return person