def add_series(mods, client): bibframe = client.bibframe schema_org = client.schema_org series_mods = etree.Element("{{{}}}mods".format(mods2ds.MODS_NS)) title_info = etree.Element("{{{}}}titleInfo".format(mods2ds.MODS_NS)) title = etree.Element("{{{}}}title".format(mods2ds.MODS_NS)) title.text = mods.text title_info.append(title) series_mods.append(title_info) return mods2ds.get_or_add_periodical(series_mods, client, RECORD_CONSTANTS)
def insert_mods(mods_xml, client): """Inserts a MODS XML datastream to MongoDB schema_org and bibframe collections. Args: mods_xml: Raw MODS XML client: Mongo Client Returns: None Raises: None """ mods = etree.XML(mods_xml) genre = mods.find("{{{0}}}genre".format(mods2ds.MODS_NS)) if genre is None: # Try genre subject genre = mods.find("{{{0}}}subject/{{{0}}}genre".format(mods2ds.MODS_NS)) if genre is not None and genre.text is not None: if ['audio recording', 'interview', 'personal narratives'].count(genre.text.lower()) > 0: return mods2ds.get_or_add_audio(mods, client, RECORD_CONSTANTS) if ['newspaper', 'periodical'].count(genre.text.lower()) > 0: return mods2ds.get_or_add_periodical(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('history'): return mods2ds.get_or_add_article(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('photo'): return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('pict'): return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('thes') or \ genre.text.lower().startswith('essay'): return mods2ds.add_thesis(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('videorecord'): return mods2ds.get_or_add_video(mods, client, RECORD_CONSTANTS) # Next try using type_of_resource value to guess type type_of_resource = mods.find( "{{{0}}}typeOfResource".format(mods2ds.MODS_NS)) if type_of_resource is not None and type_of_resource.text is not None: if type_of_resource.text.startswith('sound'): return mods2ds.get_or_add_audio(mods, client, RECORD_CONSTANTS) if type_of_resource.text.startswith('still image'): return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS) if type_of_resource.text.startswith("text"): series = mods.find( "{{{0}}}relatedItem[@type='series']/{{{0}}}titleInfo/{{{0}}}title".format( mods2ds.MODS_NS)) if series is not None and series.text is not None: series_id = add_series(series, client) article_id = mods2ds.get_or_add_article(mods, client, RECORD_CONSTANTS) client.schema_org.CreativeWork.update( {"_id": article_id}, {"$set": {"isPartOf": str(series_id)}}) return article_id # No matches, create a generic CreativeWork work = CreativeWork(**mods2ds.add_base(mods, client, RECORD_CONSTANTS)) work_id = client.schema_org.CreativeWork.insert(work.as_dict()) return work_id