예제 #1
0
def get_or_add_periodical(mods, client, record_constants):
    """Takes a MODS etree and gets ors adds a Periodical

    Periodical model uses the proposal at
    http://www.w3.org/community/schemabibex/ to add support for recurring
    resources in the MongoDatastore

    Args:
        mods: MODS XML etree
        client: Mongo DB Client
        record_constants: Dictionary of Record constants

    Returns:
        ObjectId: Mongo DB ObjectId for the schema.org Thesis
    """
    schema_org = client.schema_org
    bibframe = client.bibframe
    title = mods.find("{{{0}}}titleInfo/{{{0}}}title".format(MODS_NS))
    existing_periodical = schema_org.CreativeWork.find_one(
        {"@type": 'Periodical',
         "headline": title.text})
    if existing_periodical is not None:
        return existing_periodical.get('_id')
    base_mods = add_base(mods, client, record_constants)
    periodical = CreativeWork(**base_mods)
    periodical_dict = periodical.as_dict()
    periodical_dict['@type'] = 'Periodical'
    periodical_id = schema_org.CreativeWork.insert(periodical_dict)
    return periodical_id
예제 #2
0
def get_or_add_periodical(mods, client, record_constants):
    """Takes a MODS etree and gets ors adds a Periodical

    Periodical model uses the proposal at
    http://www.w3.org/community/schemabibex/ to add support for recurring
    resources in the MongoDatastore

    Args:
        mods: MODS XML etree
        client: Mongo DB Client
        record_constants: Dictionary of Record constants

    Returns:
        ObjectId: Mongo DB ObjectId for the schema.org Thesis
    """
    schema_org = client.schema_org
    bibframe = client.bibframe
    title = mods.find("{{{0}}}titleInfo/{{{0}}}title".format(MODS_NS))
    existing_periodical = schema_org.CreativeWork.find_one({
        "@type":
        'Periodical',
        "headline":
        title.text
    })
    if existing_periodical is not None:
        return existing_periodical.get('_id')
    base_mods = add_base(mods, client, record_constants)
    periodical = CreativeWork(**base_mods)
    periodical_dict = periodical.as_dict()
    periodical_dict['@type'] = 'Periodical'
    periodical_id = schema_org.CreativeWork.insert(periodical_dict)
    return periodical_id
예제 #3
0
def add_publication_volume(mods, client, volume, record_constants):
    schema_org = client.schema_org
    bibframe = client.bibframe
    base_mods = add_base(mods, client, record_constants)
    publication_volume = CreativeWork(**base_mods)
    setattr(publication_volume, 'volumeNumber', volume)
    pub_volume_dict = publication_volume.as_dict()
    pub_volume_dict['@type'] = 'PublicationVolume'
    pub_volume_id = schema_org.CreativeWork.insert(pub_volume_dict)
    return pub_volume_id
예제 #4
0
def add_publication_volume(mods, client, volume, record_constants):
    schema_org = client.schema_org
    bibframe = client.bibframe
    base_mods = add_base(mods, client, record_constants)
    publication_volume = CreativeWork(**base_mods)
    setattr(publication_volume, 'volumeNumber', volume)
    pub_volume_dict = publication_volume.as_dict()
    pub_volume_dict['@type'] = 'PublicationVolume'
    pub_volume_id = schema_org.CreativeWork.insert(pub_volume_dict)
    return pub_volume_id
예제 #5
0
def add_publication_issue(mods, client, issue_number, record_constants):
    schema_org = client.schema_org
    bibframe = client.bibframe
    base_mods = add_base(mods, client, record_constants)
    publication_issue = CreativeWork(**base_mods)
    setattr(publication_issue, 'issueNumber', issue_number)
    pub_issue_dict = publication_issue.as_dict()
    pub_issue_dict['@type'] = 'PublicationIssue'
    pub_issue_id = schema_org.CreativeWork.insert(pub_issue_dict)

    return pub_issue_id
예제 #6
0
def add_publication_issue(mods, client, issue_number, record_constants):
    schema_org = client.schema_org
    bibframe = client.bibframe
    base_mods = add_base(mods, client, record_constants)
    publication_issue = CreativeWork(**base_mods)
    setattr(publication_issue, 'issueNumber', issue_number)
    pub_issue_dict = publication_issue.as_dict()
    pub_issue_dict['@type'] = 'PublicationIssue'
    pub_issue_id = schema_org.CreativeWork.insert(pub_issue_dict)

    return pub_issue_id
예제 #7
0
def add_thesis(mods, client, record_constants):
    """Takes a MODS etree and adds a Thesis to the Mongo Datastore

    Function takes a MODS etree and based on mods:genre value, creates a
    custom Thesis Schema.org class that is descendent from schema:CreativeWork

    Args:
        mods: MODS XML etree
        client: Mongo DB Client

    Returns:
        ObjectId: Mongo DB ObjectId for the schema.org Thesis
    """
    schema_org = client.schema_org
    bibframe = client.bibframe
    base_mods = add_base(mods, client, record_constants)
    thesis = CreativeWork(**base_mods)
    thesis.genre = 'thesis'
    if thesis.copyrightHolder is None:
        thesis.copyrightHolder = []
    thesis.copyrightHolder.extend(base_mods['creator'])
    bf_text = bf_models.Text(recordInfo=generate_record_info(
                                record_constants['source'],
                                record_constants['msg']),
                             title=base_mods.get('headline'))
    for name in mods.findall("{{{0}}}name".format(MODS_NS)):
        name_type = name.attrib.get('type')
        role = name.find("{{{0}}}role/{{{0}}}roleTerm".format(MODS_NS))
        if name_type == 'corporate':
            org_name = name.find("{{{}}}namePart".format(MODS_NS))
            org_id = get_or_add_organization(
                        org_name.text,
                        client, record_constants)
            if org_id is not None and role is not None:
                if role.text == 'sponsor':
                    thesis.sourceOrganization = str(org_id)
                    if thesis.publisher:
                        publisher = schema_org.Organization.find_one(
                            {'_id': ObjectId(thesis.publisher)})
                        if publisher.get('department') is None:
                            publisher['department'] = []
                        if not str(org_id) in publisher.get('department'):
                            publisher['department'].append(str(org_id))
                            schema_org.Organization.update(
                                {'_id': publisher.get('_id')},
                                { '$set': {"department": publisher['department']
                                }})
    if thesis.publisher:
        bf_organization = bibframe.Organization.find_one(
            {"relatedTo": thesis.publisher},
            {"_id": 1})
        bf_text.dissertationInstitution = str(bf_organization.get('_id'))
    for note in mods.findall("{{{0}}}note".format(MODS_NS)):
        if note.attrib.get('type') == 'thesis' and \
        note.attrib.get('displayLabel') == "Degree Name":
            bf_text.dissertationDegree = note.text
    thesis_id = schema_org.CreativeWork.insert(thesis.as_dict())
    bf_text.relatedTo = [thesis_id,]
    bf_text_id = bibframe.Work.insert(bf_text.as_dict())
    schema_org.CreativeWork.update({"_id": thesis_id},
                                   {"$set": {'sameAs': [str(bf_text_id)]}})
    return thesis_id
예제 #8
0
def insert_mods(mods_xml, client):
    """Inserts a MODS XML datastream to MongoDB schema_org and bibframe
    collections.

    Args:
        mods_xml: Raw MODS XML
        client: Mongo Client

    Returns:
        None

    Raises:
        None
    """
    mods = etree.XML(mods_xml)
    genre = mods.find("{{{0}}}genre".format(mods2ds.MODS_NS))

    if genre is None:
        # Try genre subject
        genre = mods.find("{{{0}}}subject/{{{0}}}genre".format(mods2ds.MODS_NS))
    if genre is not None and genre.text is not None:
        if ['audio recording',
            'interview',
            'personal narratives'].count(genre.text.lower()) > 0:
            return mods2ds.get_or_add_audio(mods, client, RECORD_CONSTANTS)
        if ['newspaper', 'periodical'].count(genre.text.lower()) > 0:
            return mods2ds.get_or_add_periodical(mods, client, RECORD_CONSTANTS)
        if genre.text.lower().startswith('history'):
            return mods2ds.get_or_add_article(mods, client, RECORD_CONSTANTS)
        if genre.text.lower().startswith('photo'):
            return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS)
        if genre.text.lower().startswith('pict'):
            return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS)
        if genre.text.lower().startswith('thes') or \
        genre.text.lower().startswith('essay'):
            return mods2ds.add_thesis(mods, client, RECORD_CONSTANTS)
        if genre.text.lower().startswith('videorecord'):
            return mods2ds.get_or_add_video(mods, client, RECORD_CONSTANTS)

    # Next try using type_of_resource value to guess type
    type_of_resource = mods.find(
        "{{{0}}}typeOfResource".format(mods2ds.MODS_NS))
    if type_of_resource is not None and type_of_resource.text is not None:
        if type_of_resource.text.startswith('sound'):
            return mods2ds.get_or_add_audio(mods, client, RECORD_CONSTANTS)
        if type_of_resource.text.startswith('still image'):
            return mods2ds.get_or_add_photograph(mods, client, RECORD_CONSTANTS)
        if type_of_resource.text.startswith("text"):
            series = mods.find(
                "{{{0}}}relatedItem[@type='series']/{{{0}}}titleInfo/{{{0}}}title".format(
                mods2ds.MODS_NS))
            if series is not None and series.text is not None:
                series_id = add_series(series, client)
                article_id = mods2ds.get_or_add_article(mods, client, RECORD_CONSTANTS)
                client.schema_org.CreativeWork.update(
                    {"_id": article_id},
                    {"$set": {"isPartOf": str(series_id)}})
                return article_id
    # No matches, create a generic CreativeWork
    work = CreativeWork(**mods2ds.add_base(mods, client, RECORD_CONSTANTS))
    work_id = client.schema_org.CreativeWork.insert(work.as_dict())
    return work_id
예제 #9
0
def add_thesis(mods, client, record_constants):
    """Takes a MODS etree and adds a Thesis to the Mongo Datastore

    Function takes a MODS etree and based on mods:genre value, creates a
    custom Thesis Schema.org class that is descendent from schema:CreativeWork

    Args:
        mods: MODS XML etree
        client: Mongo DB Client

    Returns:
        ObjectId: Mongo DB ObjectId for the schema.org Thesis
    """
    schema_org = client.schema_org
    bibframe = client.bibframe
    base_mods = add_base(mods, client, record_constants)
    thesis = CreativeWork(**base_mods)
    thesis.genre = 'thesis'
    if thesis.copyrightHolder is None:
        thesis.copyrightHolder = []
    thesis.copyrightHolder.extend(base_mods['creator'])
    bf_text = bf_models.Text(recordInfo=generate_record_info(
        record_constants['source'], record_constants['msg']),
                             title=base_mods.get('headline'))
    for name in mods.findall("{{{0}}}name".format(MODS_NS)):
        name_type = name.attrib.get('type')
        role = name.find("{{{0}}}role/{{{0}}}roleTerm".format(MODS_NS))
        if name_type == 'corporate':
            org_name = name.find("{{{}}}namePart".format(MODS_NS))
            org_id = get_or_add_organization(org_name.text, client,
                                             record_constants)
            if org_id is not None and role is not None:
                if role.text == 'sponsor':
                    thesis.sourceOrganization = str(org_id)
                    if thesis.publisher:
                        publisher = schema_org.Organization.find_one(
                            {'_id': ObjectId(thesis.publisher)})
                        if publisher.get('department') is None:
                            publisher['department'] = []
                        if not str(org_id) in publisher.get('department'):
                            publisher['department'].append(str(org_id))
                            schema_org.Organization.update(
                                {'_id': publisher.get('_id')}, {
                                    '$set': {
                                        "department": publisher['department']
                                    }
                                })
    if thesis.publisher:
        bf_organization = bibframe.Organization.find_one(
            {"relatedTo": thesis.publisher}, {"_id": 1})
        bf_text.dissertationInstitution = str(bf_organization.get('_id'))
    for note in mods.findall("{{{0}}}note".format(MODS_NS)):
        if note.attrib.get('type') == 'thesis' and \
        note.attrib.get('displayLabel') == "Degree Name":
            bf_text.dissertationDegree = note.text
    thesis_id = schema_org.CreativeWork.insert(thesis.as_dict())
    bf_text.relatedTo = [
        thesis_id,
    ]
    bf_text_id = bibframe.Work.insert(bf_text.as_dict())
    schema_org.CreativeWork.update({"_id": thesis_id},
                                   {"$set": {
                                       'sameAs': [str(bf_text_id)]
                                   }})
    return thesis_id