def get_or_add_periodical(mods, client, record_constants): """Takes a MODS etree and gets ors adds a Periodical Periodical model uses the proposal at http://www.w3.org/community/schemabibex/ to add support for recurring resources in the MongoDatastore Args: mods: MODS XML etree client: Mongo DB Client record_constants: Dictionary of Record constants Returns: ObjectId: Mongo DB ObjectId for the schema.org Thesis """ schema_org = client.schema_org bibframe = client.bibframe title = mods.find("{{{0}}}titleInfo/{{{0}}}title".format(MODS_NS)) existing_periodical = schema_org.CreativeWork.find_one( {"@type": 'Periodical', "headline": title.text}) if existing_periodical is not None: return existing_periodical.get('_id') base_mods = add_base(mods, client, record_constants) periodical = CreativeWork(**base_mods) periodical_dict = periodical.as_dict() periodical_dict['@type'] = 'Periodical' periodical_id = schema_org.CreativeWork.insert(periodical_dict) return periodical_id
def get_or_add_periodical(mods, client, record_constants): """Takes a MODS etree and gets ors adds a Periodical Periodical model uses the proposal at http://www.w3.org/community/schemabibex/ to add support for recurring resources in the MongoDatastore Args: mods: MODS XML etree client: Mongo DB Client record_constants: Dictionary of Record constants Returns: ObjectId: Mongo DB ObjectId for the schema.org Thesis """ schema_org = client.schema_org bibframe = client.bibframe title = mods.find("{{{0}}}titleInfo/{{{0}}}title".format(MODS_NS)) existing_periodical = schema_org.CreativeWork.find_one({ "@type": 'Periodical', "headline": title.text }) if existing_periodical is not None: return existing_periodical.get('_id') base_mods = add_base(mods, client, record_constants) periodical = CreativeWork(**base_mods) periodical_dict = periodical.as_dict() periodical_dict['@type'] = 'Periodical' periodical_id = schema_org.CreativeWork.insert(periodical_dict) return periodical_id
def add_publication_volume(mods, client, volume, record_constants): schema_org = client.schema_org bibframe = client.bibframe base_mods = add_base(mods, client, record_constants) publication_volume = CreativeWork(**base_mods) setattr(publication_volume, 'volumeNumber', volume) pub_volume_dict = publication_volume.as_dict() pub_volume_dict['@type'] = 'PublicationVolume' pub_volume_id = schema_org.CreativeWork.insert(pub_volume_dict) return pub_volume_id
def add_publication_issue(mods, client, issue_number, record_constants): schema_org = client.schema_org bibframe = client.bibframe base_mods = add_base(mods, client, record_constants) publication_issue = CreativeWork(**base_mods) setattr(publication_issue, 'issueNumber', issue_number) pub_issue_dict = publication_issue.as_dict() pub_issue_dict['@type'] = 'PublicationIssue' pub_issue_id = schema_org.CreativeWork.insert(pub_issue_dict) return pub_issue_id
def add_thesis(mods, client, record_constants): """Takes a MODS etree and adds a Thesis to the Mongo Datastore Function takes a MODS etree and based on mods:genre value, creates a custom Thesis Schema.org class that is descendent from schema:CreativeWork Args: mods: MODS XML etree client: Mongo DB Client Returns: ObjectId: Mongo DB ObjectId for the schema.org Thesis """ schema_org = client.schema_org bibframe = client.bibframe base_mods = add_base(mods, client, record_constants) thesis = CreativeWork(**base_mods) thesis.genre = 'thesis' if thesis.copyrightHolder is None: thesis.copyrightHolder = [] thesis.copyrightHolder.extend(base_mods['creator']) bf_text = bf_models.Text(recordInfo=generate_record_info( record_constants['source'], record_constants['msg']), title=base_mods.get('headline')) for name in mods.findall("{{{0}}}name".format(MODS_NS)): name_type = name.attrib.get('type') role = name.find("{{{0}}}role/{{{0}}}roleTerm".format(MODS_NS)) if name_type == 'corporate': org_name = name.find("{{{}}}namePart".format(MODS_NS)) org_id = get_or_add_organization( org_name.text, client, record_constants) if org_id is not None and role is not None: if role.text == 'sponsor': thesis.sourceOrganization = str(org_id) if thesis.publisher: publisher = schema_org.Organization.find_one( {'_id': ObjectId(thesis.publisher)}) if publisher.get('department') is None: publisher['department'] = [] if not str(org_id) in publisher.get('department'): publisher['department'].append(str(org_id)) schema_org.Organization.update( {'_id': publisher.get('_id')}, { '$set': {"department": publisher['department'] }}) if thesis.publisher: bf_organization = bibframe.Organization.find_one( {"relatedTo": thesis.publisher}, {"_id": 1}) bf_text.dissertationInstitution = str(bf_organization.get('_id')) for note in mods.findall("{{{0}}}note".format(MODS_NS)): if note.attrib.get('type') == 'thesis' and \ note.attrib.get('displayLabel') == "Degree Name": bf_text.dissertationDegree = note.text thesis_id = schema_org.CreativeWork.insert(thesis.as_dict()) bf_text.relatedTo = [thesis_id,] bf_text_id = bibframe.Work.insert(bf_text.as_dict()) schema_org.CreativeWork.update({"_id": thesis_id}, {"$set": {'sameAs': [str(bf_text_id)]}}) return thesis_id
def insert_mods(mods_xml, client): """Inserts a MODS XML datastream to MongoDB schema_org and bibframe collections. Args: mods_xml: Raw MODS XML client: Mongo Client Returns: None Raises: None """ mods = etree.XML(mods_xml) genre = mods.find("{{{0}}}genre".format(mods2ds.MODS_NS)) if genre is None: # Try genre subject genre = mods.find("{{{0}}}subject/{{{0}}}genre".format(mods2ds.MODS_NS)) if genre is not None and genre.text is not None: if ['audio recording', 'interview', 'personal narratives'].count(genre.text.lower()) > 0: return mods2ds.get_or_add_audio(mods, client, RECORD_CONSTANTS) if ['newspaper', 'periodical'].count(genre.text.lower()) > 0: return mods2ds.get_or_add_periodical(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('history'): return mods2ds.get_or_add_article(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('photo'): return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('pict'): return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('thes') or \ genre.text.lower().startswith('essay'): return mods2ds.add_thesis(mods, client, RECORD_CONSTANTS) if genre.text.lower().startswith('videorecord'): return mods2ds.get_or_add_video(mods, client, RECORD_CONSTANTS) # Next try using type_of_resource value to guess type type_of_resource = mods.find( "{{{0}}}typeOfResource".format(mods2ds.MODS_NS)) if type_of_resource is not None and type_of_resource.text is not None: if type_of_resource.text.startswith('sound'): return mods2ds.get_or_add_audio(mods, client, RECORD_CONSTANTS) if type_of_resource.text.startswith('still image'): return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS) if type_of_resource.text.startswith("text"): series = mods.find( "{{{0}}}relatedItem[@type='series']/{{{0}}}titleInfo/{{{0}}}title".format( mods2ds.MODS_NS)) if series is not None and series.text is not None: series_id = add_series(series, client) article_id = mods2ds.get_or_add_article(mods, client, RECORD_CONSTANTS) client.schema_org.CreativeWork.update( {"_id": article_id}, {"$set": {"isPartOf": str(series_id)}}) return article_id # No matches, create a generic CreativeWork work = CreativeWork(**mods2ds.add_base(mods, client, RECORD_CONSTANTS)) work_id = client.schema_org.CreativeWork.insert(work.as_dict()) return work_id
def add_thesis(mods, client, record_constants): """Takes a MODS etree and adds a Thesis to the Mongo Datastore Function takes a MODS etree and based on mods:genre value, creates a custom Thesis Schema.org class that is descendent from schema:CreativeWork Args: mods: MODS XML etree client: Mongo DB Client Returns: ObjectId: Mongo DB ObjectId for the schema.org Thesis """ schema_org = client.schema_org bibframe = client.bibframe base_mods = add_base(mods, client, record_constants) thesis = CreativeWork(**base_mods) thesis.genre = 'thesis' if thesis.copyrightHolder is None: thesis.copyrightHolder = [] thesis.copyrightHolder.extend(base_mods['creator']) bf_text = bf_models.Text(recordInfo=generate_record_info( record_constants['source'], record_constants['msg']), title=base_mods.get('headline')) for name in mods.findall("{{{0}}}name".format(MODS_NS)): name_type = name.attrib.get('type') role = name.find("{{{0}}}role/{{{0}}}roleTerm".format(MODS_NS)) if name_type == 'corporate': org_name = name.find("{{{}}}namePart".format(MODS_NS)) org_id = get_or_add_organization(org_name.text, client, record_constants) if org_id is not None and role is not None: if role.text == 'sponsor': thesis.sourceOrganization = str(org_id) if thesis.publisher: publisher = schema_org.Organization.find_one( {'_id': ObjectId(thesis.publisher)}) if publisher.get('department') is None: publisher['department'] = [] if not str(org_id) in publisher.get('department'): publisher['department'].append(str(org_id)) schema_org.Organization.update( {'_id': publisher.get('_id')}, { '$set': { "department": publisher['department'] } }) if thesis.publisher: bf_organization = bibframe.Organization.find_one( {"relatedTo": thesis.publisher}, {"_id": 1}) bf_text.dissertationInstitution = str(bf_organization.get('_id')) for note in mods.findall("{{{0}}}note".format(MODS_NS)): if note.attrib.get('type') == 'thesis' and \ note.attrib.get('displayLabel') == "Degree Name": bf_text.dissertationDegree = note.text thesis_id = schema_org.CreativeWork.insert(thesis.as_dict()) bf_text.relatedTo = [ thesis_id, ] bf_text_id = bibframe.Work.insert(bf_text.as_dict()) schema_org.CreativeWork.update({"_id": thesis_id}, {"$set": { 'sameAs': [str(bf_text_id)] }}) return thesis_id