def load_dict(meta_dict): if "rec_class" not in meta_dict: return Common(meta_dict) elif meta_dict["rec_class"] == "Document": return Document(meta_dict) elif meta_dict["rec_class"] == "Person": return Person(meta_dict) elif meta_dict["rec_class"] == "Orgunit": return Orgunit(meta_dict) elif meta_dict["rec_class"] == "Project": return Project(meta_dict) elif meta_dict["rec_class"] == "Event": return Event(meta_dict) elif meta_dict["rec_class"] == "Family": return Family(meta_dict) elif meta_dict["rec_class"] == "Field": return Field(meta_dict) elif meta_dict["rec_class"] == "Resource": return Resource(meta_dict) elif meta_dict["rec_class"] == "Target": return Target(meta_dict) elif meta_dict["rec_class"] == "Type": return Type(meta_dict) elif meta_dict["rec_class"] == "Collection": return Collection(meta_dict) else: logging.debug(jsonbson.dumps_bson(meta_dict)) logging.warning("Unknown rec_class: {O}".format(meta_dict["rec_class"])) return Common(meta_dict)
def get_rml_teachings(rml): """ teaching -> teachings """ result = {} rml_teachings = rml.findall(xmletree.prefixtag("rml", "teaching")) if rml_teachings is not None: teachings = [] for rml_teaching in rml_teachings: if rml_teaching is not None: teaching = {} # dateBegin -> date_begin teaching.update(get_rml_element_text_and_set_key(rml_teaching, "dateBegin", "date_begin")) # dateEnd -> date_end teaching.update(get_rml_element_text_and_set_key(rml_teaching, "dateEnd", "date_end")) # description -> descriptions[i] teaching.update(get_rml_textlangs_and_set_key(rml_teaching, "description", "descriptions")) # level -> level teaching.update(get_rml_element_text_and_set_key(rml_teaching, "level", "level")) # title -> title teaching.update(get_rml_element_text_and_set_key(rml_teaching, "title", "title")) # creators # name -> creators[0].agent.name name = get_rml_element_text(rml_teaching, "name") creator = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_ORGUNIT, "dgg") if creator is None: creator = Creator() creator["agent"] = Orgunit() creator["roles"] = "dgg" # identifiers -> creators[0].agent.rec_id or creators[0].agent.identifiers creator["agent"].update(get_rml_identifiers(rml_teaching)) if "name" in creator["agent"] or "rec_id" in creator["agent"] or "identifiers" in creator["agent"]: teaching["creators"] = [creator] if teaching is not None: teachings.append(teaching) if teachings: result["teachings"] = teachings return result
def get_rml_degrees(rml): """ degree -> degrees """ result = {} rml_degrees = rml.findall(xmletree.prefixtag("rml", "degree")) if rml_degrees is not None: degrees = [] for rml_degree in rml_degrees: if rml_degree is not None: degree = {} # dateBegin -> date_begin degree.update(get_rml_element_text_and_set_key(rml_degree, "dateBegin", "date_begin")) # dateEnd -> date_end degree.update(get_rml_element_text_and_set_key(rml_degree, "dateEnd", "date_end")) # description -> descriptions degree.update(get_rml_textlangs_and_set_key(rml_degree, "description", "descriptions")) # level -> level degree.update(get_rml_element_text_and_set_key(rml_degree, "level", "level")) # title -> title degree.update(get_rml_element_text_and_set_key(rml_degree, "title", "title")) # creators # name -> creators[0].agent.name name = get_rml_element_text(rml_degree, "name") creator = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_ORGUNIT, "dgg") if creator is None: creator = Creator() creator["agent"] = Orgunit() creator["roles"] = ["dgg"] # identifiers -> creators[0].agent.rec_id or creators[0].agent.identifiers creator["agent"].update(get_rml_identifiers(rml_degree)) if "name" in creator["agent"] or "rec_id" in creator["agent"] or "identifiers" in creator["agent"]: degree["creators"] = [creator] if degree is not None: degrees.append(degree) if degrees: result["degrees"] = degrees return result
def get_rml_call(rml): """ call -> call """ result = {} rml_call = rml.find(xmletree.prefixtag("rml", "call")) if rml_call is not None: call = Call() # funding -> funding rml_funding = rml_call.find(xmletree.prefixtag("rml", "funding")) if rml_funding is not None: # name -> agent.name name = get_rml_element_text(rml_funding, "name") funding = creator_service.formatted_name_to_creator(name, constants.REC_CLASS_ORGUNIT, None) if funding is None: funding = Creator() funding["agent"] = Orgunit() # identifier -> agent.rec_id & agent.identifiers funding["agent"].update(get_rml_identifiers(rml_funding)) # programme -> programme funding.update(get_rml_element_text_and_set_key(rml_funding, "programme", "programme")) # scheme -> scheme funding.update(get_rml_element_text_and_set_key(rml_funding, "scheme", "scheme")) # contribution -> contribution funding.update(get_rml_money_and_set_key(rml_funding, "contribution", "contribution")) if funding: call["funding"] = funding # identifier -> rec_id call.update(get_rml_element_text_and_set_key(rml_call, "identifier", "rec_id")) # title -> title call.update(get_rml_element_text_and_set_key(rml_call, "title", "title")) # year -> date_issued call.update(get_rml_element_text_and_set_key(rml_call, "year", "date_issued")) if call: result["call"] = call return result
def create_affiliation(rec_id, name, role=None, date_begin=None, date_end=None, preferred=False, descriptions=None): affiliation = {} if date_begin: affiliation["date_begin"] = date_begin if date_end: affiliation["date_end"] = date_end if descriptions: affiliation["descriptions"] = descriptions if preferred: affiliation["preferred"] = preferred if role: affiliation["role"] = role # agent agent = Orgunit() if rec_id: agent["rec_id"] = rec_id if name: agent["name"] = name affiliation["agent"] = agent return affiliation
def formatted_name_to_creator(formatted_name, rec_class, role): #logging.debug("formatted_name_to_creator formatted_name: {}".format(formatted_name)) if formatted_name: formatted_name = formatted_name.strip() event = None family = None orgunit = None person = None #logging.debug("formatted_name: {}".format(formatted_name)) # rec_class determination if rec_class is None or rec_class not in [ constants.REC_CLASS_EVENT, constants.REC_CLASS_FAMILY, constants.REC_CLASS_ORGUNIT, constants.REC_CLASS_PERSON ]: for event_term in creator_event_terms: if event_term in formatted_name.lower(): rec_class = constants.REC_CLASS_EVENT break for orgunit_term in creator_orgunit_terms: if orgunit_term in formatted_name.lower(): rec_class = constants.REC_CLASS_ORGUNIT break if rec_class is None: rec_class = constants.REC_CLASS_PERSON creator = Creator() if role: creator["roles"] = [role] if rec_class == constants.REC_CLASS_EVENT: event = Event() event["title"] = formatted_name creator["agent"] = event elif rec_class == constants.REC_CLASS_ORGUNIT: orgunit = Orgunit() orgunit["name"] = formatted_name creator["agent"] = orgunit elif rec_class == constants.REC_CLASS_PERSON or rec_class == constants.REC_CLASS_FAMILY: # class is "Person" or "Family" name_given = "" name_middle = "" name_family = "" name_prefix = "" name_terms_of_address = "" date_birth = "" date_death = "" parenthesis_index = formatted_name.rfind("(") if parenthesis_index != -1: #may be like: name (date_birth-date_death) dates_part = formatted_name[parenthesis_index + 1:-1].strip() date_birth = dates_part[:4] date_death = dates_part[5:] if date_death == "....": date_death = "" formatted_name = formatted_name[:parenthesis_index].strip() slash_index = formatted_name.find("/") if slash_index != -1: #like: name/affiliation affiliation_name = formatted_name[slash_index + 1:].strip() formatted_name = formatted_name[:slash_index].strip() commaspacejrdot_index = formatted_name.rfind(", Jr.") if (commaspacejrdot_index != -1): #like "Paul B. Harvey, Jr." formatted_name = formatted_name[:commaspacejrdot_index].strip() name_middle = "Jr." #Is it formatted like "Family, Given" or "Given Family" ? comma_index = formatted_name.find(",") if comma_index == -1: space_index = formatted_name.rfind(" ") #logging.debug(formatted_name) #logging.debug(space_index) if space_index != -1: #like Given Family name_given = formatted_name[:space_index].strip() name_family = formatted_name[space_index + 1:].strip() else: #like Family name_family = formatted_name.strip() else: #like Family, Given name_family = formatted_name[:comma_index].strip() name_given = formatted_name[comma_index + 1:].strip() # manage the terms_of_address and particule for term_of_address in creator_person_terms_of_address: if name_family and name_family.lower().startswith( term_of_address + " "): name_terms_of_address = name_family[:len(term_of_address)] name_family = name_family[len(term_of_address):].strip() if name_given: if name_given.lower().endswith(" " + term_of_address): name_terms_of_address = name_given[-len(term_of_address ):] name_given = name_given[:-len(term_of_address)].strip() if name_given.lower().startswith(term_of_address + " "): name_terms_of_address = name_given[:len(term_of_address )] name_given = name_given[len(term_of_address):].strip() if name_given.lower() == term_of_address: name_terms_of_address = name_given name_given = None # Be careful with a particule inside the name like: Viveiros de Castro, Eduardo for particule in creator_particule: if name_family and name_family.lower().startswith(particule + " "): name_prefix = name_family[0:len(particule)] name_family = name_family[len(particule):].strip() if name_given: if name_given.lower().endswith(" " + particule): name_prefix = name_given[-len(particule):] name_given = name_given[:-len(particule)].strip() if name_given.lower().startswith(particule + " "): name_prefix = name_given[:len(particule)] name_given = name_given[len(particule):].strip() if name_given.lower() == particule: name_prefix = name_given name_given = None if rec_class == constants.REC_CLASS_PERSON: person = Person() person.set_key_if_not_none("name_family", name_family) person.set_key_if_not_none("name_given", name_given) person.set_key_if_not_none("name_middle", name_middle) person.set_key_if_not_none("name_terms_of_address", name_terms_of_address) person.set_key_if_not_none("name_prefix", name_prefix) person.set_key_if_not_none("date_birth", date_birth) person.set_key_if_not_none("date_death", date_death) creator["agent"] = person if 'affiliation_name' in vars() and affiliation_name: #todo manage as an object affiliation = Orgunit() affiliation["name"] = affiliation_name creator["affiliation"] = affiliation elif rec_class == constants.REC_CLASS_FAMILY: family = Family() family.set_key_if_not_none("name_family", name_family) creator["agent"] = family #logging.debug(jsonbson.dumps_json(creator, True)) return creator
def org_laboratory_to_metajson(org_laboratory): orgunit = Orgunit() return orgunit
def rml_orgunit_to_metajson(rml_orgunit, source, rec_id_prefix): """ orgUnit -> orgunit """ orgunit = Orgunit() # source if source: orgunit["rec_source"] = source # acronym -> acronym orgunit.update(get_rml_element_text_and_set_key(rml_orgunit, "acronym", "acronym")) # address -> addresses orgunit.update(get_rml_addresses(rml_orgunit)) # affiliation -> affiliations orgunit.update(get_rml_affiliations(rml_orgunit)) # award -> awards orgunit.update(get_rml_textlangs_and_set_key(rml_orgunit, "award", "awards")) # ckbData -> self_archiving_policy orgunit.update(get_rml_self_archiving_policy(rml_orgunit)) # dateOfDissolution -> date_dissolution orgunit.update(get_rml_element_text_and_set_key(rml_orgunit, "dateOfDissolution", "date_dissolution")) # dateOfFoundation -> date_foundation orgunit.update(get_rml_element_text_and_set_key(rml_orgunit, "dateOfFoundation", "date_foundation")) # description -> descriptions orgunit.update(get_rml_textlangs_and_set_key(rml_orgunit, "description", "descriptions")) # email -> emails orgunit.update(get_rml_emails(rml_orgunit)) # headcount -> headcounts orgunit.update(get_rml_headcounts(rml_orgunit)) # identifier -> identifiers orgunit.update(get_rml_identifiers(rml_orgunit)) # image -> resources[0] orgunit.update(get_rml_images(rml_orgunit, "logo")) # name -> name orgunit.update(get_rml_element_text_and_set_key(rml_orgunit, "name", "name")) # nameAlternative -> name_alternatives orgunit.update(get_rml_textlangs_and_set_key(rml_orgunit, "nameAlternative", "name_alternatives")) # nationality -> nationality orgunit.update(get_rml_element_text_and_set_key(rml_orgunit, "nationality", "nationality")) # note -> notes orgunit.update(get_rml_textlangs_and_set_key(rml_orgunit, "note", "notes")) # olDescription -> descriptions_short orgunit.update(get_rml_textlangs_and_set_key(rml_orgunit, "olDescription", "descriptions_short")) # ongoingResearch -> ongoing_researches orgunit.update(get_rml_ongoing_researches(rml_orgunit)) # phone -> phones orgunit.update(get_rml_phones(rml_orgunit)) # researchCoverage -> research_coverages orgunit.update(get_rml_research_coverages(rml_orgunit)) # skill -> skills orgunit.update(get_rml_textlangs_and_set_key(rml_orgunit, "skill", "skills")) # turnover -> turnovers orgunit.update(get_rml_turnovers(rml_orgunit)) # @type -> rec_type orgunit.update(xmletree.get_element_attribute_and_set_key(rml_orgunit, "type", "rec_type")) # uri -> urls orgunit.update(get_rml_uris(rml_orgunit)) return orgunit