예제 #1
0
 class Extra:
     name_identifier = tools.Try(
         tools.RunPython(force_text, ctx.nameIdentifier))
     name_identifier_scheme = tools.Try(
         ctx.nameIdentifier['@nameIdentifierScheme'])
     name_identifier_scheme_uri = tools.Try(
         ctx.nameIdentifier['@schemeURI'])
예제 #2
0
 class Extra:
     funds_obligated_amt = ctx.fundsObligatedAmt
     award_id = ctx.id
     awardee_name = tools.Try(ctx.awardeeName)
     awardee_city = ctx.awardeeCity
     awardee_state_code = tools.Try(ctx.awardeeStateCode)
     date = ctx.date
예제 #3
0
파일: normalizer.py 프로젝트: sf2ne/SHARE
class Preprint(Parser):

    title = ctx.entry.title
    description = ctx.entry.summary
    date_published = tools.ParseDate(ctx.entry.published)
    date_updated = tools.ParseDate(ctx.entry.updated)
    contributors = tools.Map(tools.Delegate(Contributor), ctx.entry.author)

    links = tools.Map(tools.Delegate(ThroughLinks),
                      tools.Try(ctx.entry['arxiv:doi']), ctx.entry.id)

    subjects = tools.Map(
        tools.Delegate(ThroughSubjects),
        tools.Subjects(
            tools.RunPython('get_subjects',
                            tools.Concat(tools.Try(ctx.entry.category)))))

    tags = tools.Map(tools.Delegate(ThroughTags), ctx.entry.category)

    class Extra:

        resource_id = ctx.entry.id
        journal_ref = tools.Try(ctx.entry['arxiv:journal_ref'])
        comment = tools.Try(ctx.entry['arxiv:comment'])
        primary_category = tools.Try(ctx.entry['arxiv:primary_category'])

    def get_subjects(self, link):
        return list(map((lambda category: category['@term']), link))
예제 #4
0
class CreativeWork(Parser):
    schema = tools.RunPython('get_type', ctx)

    title = tools.RunPython('get_title', ctx)
    description = Soup(ctx, 'p', class_='genericfile_description')['#text']
    date_published = tools.ParseDate(
        Soup(ctx, itemprop='datePublished')['#text'])
    date_updated = tools.ParseDate(Soup(ctx, itemprop='dateModified')['#text'])
    rights = tools.OneOf(tools.RunPython('get_rights_url', ctx),
                         tools.RunPython('get_dd', ctx, 'Rights')['#text'],
                         tools.Static(None))
    language = tools.Try(
        tools.ParseLanguage(Soup(ctx, itemprop='inLanguage')['#text']))

    tags = tools.Map(tools.Delegate(ThroughTags), Soup(ctx,
                                                       itemprop='keywords'))

    identifiers = tools.Map(
        tools.Delegate(WorkIdentifier),
        tools.Try(tools.RunPython('get_dd', ctx, 'Permanent Link')),
    )

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(Creator), Soup(ctx, itemprop='creator')),
        tools.Map(tools.Delegate(Contributor), Soup(ctx,
                                                    itemprop='contributor')),
        tools.Map(tools.Delegate(Publisher), Soup(ctx, itemprop='publisher')),
    )

    class Extra:
        gwu_unit = tools.RunPython('get_dd', ctx, 'GW Unit')['#text']
        related_url = tools.RunPython('get_dd', ctx, 'Related URL')['#text']
        previous_publication_information = tools.RunPython(
            'get_dd', ctx, 'Previous Publication Information')['#text']
        depositor = tools.RunPython('get_dd', ctx, 'Depositor')['#text']
        characterization = tools.RunPython('get_dd', ctx,
                                           'Characterization')['#text']

    def get_type(self, obj):
        return {
            'http://schema.org/CreativeWork': 'CreativeWork',
            'http://schema.org/Article': 'Article',
            'http://schema.org/Book': 'Book',
        }.get(obj.soup.find('div')['itemtype'], 'CreativeWork')

    def get_title(self, obj):
        title = obj.h1.soup
        title.find('span', class_='label').decompose()
        return title.get_text()

    def get_dd(self, obj, dt):
        dt_tag = obj.soup.find('dt', string=dt)
        if dt_tag:
            return SoupXMLDict(soup=dt_tag.find_next_sibling('dd'))
        return None

    def get_rights_url(self, obj):
        dd = self.get_dd(obj, 'Rights')
        return dd.soup.find('i', class_='glyphicon-new-window').parent['href']
예제 #5
0
    class Extra:
        name_identifier = tools.Try(ctx.nameIdentifier)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])

        contributor_type = tools.Try(ctx.contributorType)
예제 #6
0
class Venue(Parser):
    name = tools.Try(ctx.awardeeName)
    location = tools.Join(tools.Concat(ctx.awardeeCity,
                                       tools.Try(ctx.awardeeStateCode)),
                          joiner=', ')

    class Extra:
        awardee_city = ctx.awardeeCity
        awardee_state_code = tools.Try(ctx.awardeeStateCode)
예제 #7
0
class Person(Parser):
    given_name = tools.ParseName(ctx.author).first
    family_name = tools.ParseName(ctx.author).last
    additional_name = tools.ParseName(ctx.author).middle
    suffix = tools.ParseName(ctx.author).suffix

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier, tools.Try(ctx.email)))
    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith),
                               tools.Try(ctx.institution))
예제 #8
0
파일: normalizer.py 프로젝트: sf2ne/SHARE
class Person(Parser):
    given_name = tools.ParseName(ctx.author).first
    family_name = tools.ParseName(ctx.author).last
    additional_name = tools.ParseName(ctx.author).middle
    suffix = tools.ParseName(ctx.author).suffix

    emails = tools.Map(tools.Delegate(PersonEmail), tools.Try(ctx.email))

    affiliations = tools.Map(
        tools.Delegate(Affiliation.using(entity=tools.Delegate(Organization))),
        tools.Try(ctx.institution))
예제 #9
0
class Funder(Parser):
    community_identifier = tools.Join(tools.Concat(
        tools.Try(ctx.nameIdentifier['@schemeURI']),
        tools.Try(ctx.nameIdentifier['#text'])),
                                      joiner='/')

    class Extra:
        name = tools.Try(ctx.contributorName)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])
예제 #10
0
    class Extra:
        status = tools.Try(ctx.record.header['@status'])

        entry_id = tools.Try(ctx.record.metadata.DIF.Entry_ID)

        metadata_name = tools.Try(ctx.record.metadata.DIF.Metadata_Name)

        metadata_version = tools.Try(ctx.record.metadata.DIF.Metadata_Version)

        last_dif_revision_date = tools.Try(
            ctx.record.metadata.DIF.Last_DIF_Revision_Date)

        set_spec = ctx.record.header.setSpec
예제 #11
0
    class Extra:
        """
        Fields that are combined in the base parser are relisted as singular elements that match
        their original entry to preserve raw data structure.
        """
        # An entity responsible for making contributions to the resource.
        contributor = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:contributor')

        # The spatial or temporal topic of the resource, the spatial applicability of the resource,
        # or the jurisdiction under which the resource is relevant.
        coverage = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:coverage')

        # An entity primarily responsible for making the resource.
        creator = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:creator')

        # A point or period of time associated with an event in the lifecycle of the resource.
        dates = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:date')

        # The file format, physical medium, or dimensions of the resource.
        resource_format = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:format')

        # An unambiguous reference to the resource within a given context.
        identifiers = tools.Concat(
            tools.Try(ctx['record']['metadata']['dc']['dc:identifier']),
            tools.Maybe(ctx['record']['header'], 'identifier'))

        # A related resource.
        relation = tools.RunPython('get_relation', ctx)

        # A related resource from which the described resource is derived.
        source = tools.Maybe(
            tools.Maybe(ctx['record'], 'metadata')['dc'], 'dc:source')

        # The topic of the resource.
        subject = tools.Try(ctx.record.metadata.dc['dc:subject'])

        # The nature or genre of the resource.
        resource_type = tools.Try(ctx.record.metadata.dc['dc:type'])

        set_spec = tools.Maybe(ctx.record.header, 'setSpec')

        # Language also stored in the Extra class in case the language reported cannot be parsed by ParseLanguage
        language = tools.Try(ctx.record.metadata.dc['dc:language'])

        # Status in the header, will exist if the resource is deleted
        status = tools.Maybe(ctx.record.header, '@status')
예제 #12
0
파일: v1_push.py 프로젝트: sheriefvt/SHARE
 class Extra:
     """
     Fields that are combined in the base parser are relisted as singular elements that match
     their original entry to preserve raw data structure.
     """
     freeToRead = tools.Try(ctx.freeToRead)
     languages = tools.Try(ctx.languages)
     licenses = tools.Try(ctx.licenses)
     otherProperties = tools.Try(ctx.otherProperties)
     publisher = tools.Try(ctx.publisher)
     subjects = tools.Try(ctx.subjects)
     sponsorships = tools.Try(ctx.sponsorships)
     tags = tools.Try(ctx.tags)
     uris = tools.Try(ctx.uris)
     version = tools.Try(ctx.version)
예제 #13
0
class Preprint(Parser):
    title = ctx.entry.title
    description = ctx.entry.summary

    date_published = tools.ParseDate(ctx.entry.published)
    date_updated = tools.ParseDate(ctx.entry.updated)
    # free_to_read_type
    # free_to_read_date
    # rights
    # language
    subjects = tools.Map(
        tools.Delegate(ThroughSubjects),
        tools.Subjects(tools.Map(ctx['@term'], ctx.entry.category)),
    )
    tags = tools.Map(
        tools.Delegate(ThroughTags),
        tools.Map(ctx['@term'], ctx.entry.category),
    )
    related_agents = tools.Concat(
        tools.Map(tools.Delegate(Creator), ctx.entry.author), )
    # related_works
    identifiers = tools.Map(tools.Delegate(WorkIdentifier),
                            tools.Try(ctx.entry['arxiv:doi']), ctx.entry.id)

    class Extra:
        resource_id = ctx.entry.id
        journal_ref = tools.Try(ctx.entry['arxiv:journal_ref'])
        comment = tools.Try(ctx.entry['arxiv:comment'])
        primary_category = tools.Try(ctx.entry['arxiv:primary_category'])
예제 #14
0
class Person(Parser):
    given_name = tools.OneOf(
        ctx.embeds.users.data.attributes.given_name,
        ctx.embeds.users.errors[0].meta.given_name,
    )
    family_name = tools.OneOf(
        ctx.embeds.users.data.attributes.family_name,
        ctx.embeds.users.errors[0].meta.family_name,
    )
    additional_name = tools.OneOf(
        ctx.embeds.users.data.attributes.middle_names,
        ctx.embeds.users.errors[0].meta.middle_names,
    )
    suffix = tools.OneOf(
        ctx.embeds.users.data.attributes.suffix,
        ctx.embeds.users.errors[0].meta.suffix,
    )

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.RunPython('registered', ctx.embeds.users.data.links.html),
        tools.Try(ctx.embeds.users.data.links.profile_image),
    )

    class Extra:
        locale = tools.Try(ctx.embeds.users.data.attributes.locale)
        date_registered = tools.Try(
            ctx.embeds.users.data.attributes.date_registered)
        active = tools.Try(ctx.embeds.users.data.attributes.active)
        timezone = tools.Try(ctx.embeds.users.data.attributes.timezone)

    def registered(self, context):
        if self.context['attributes']['unregistered_contributor']:
            return None
        return context
예제 #15
0
파일: v1_push.py 프로젝트: sheriefvt/SHARE
class FundingAgent(Parser):
    schema = tools.GuessAgentType(ctx.sponsorName, default='organization')

    name = ctx.sponsorName

    identifiers = tools.Map(tools.Delegate(AgentIdentifier),
                            tools.IRI(tools.Try(ctx.sponsorIdentifier)))
예제 #16
0
    class Extra:
        name_identifier = tools.Try(ctx.nameIdentifier)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])

        contributor_type = tools.Try(ctx.contributorType)

        # v.4 new givenName and familyName properties
        given_name = tools.OneOf(ctx.creatorName['@givenName'],
                                 ctx.contributorName['@givenName'],
                                 tools.Static(None))
        family_name = tools.OneOf(ctx.creatorName['@familyName'],
                                  ctx.contributorName['@familyName'],
                                  tools.Static(None))
예제 #17
0
class FunderAgent(Parser):
    schema = tools.GuessAgentType(tools.OneOf(ctx.funderName,
                                              ctx.contributorName),
                                  default='organization')

    name = tools.OneOf(ctx.funderName, ctx.contributorName)

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Try(tools.IRI(
            tools.OneOf(ctx.funderIdentifier,
                        tools.RunPython(force_text, ctx.nameIdentifier),
                        tools.Static(None))),
                  exceptions=(ValueError, )))

    class Extra:
        name_identifier = tools.Try(ctx.nameIdentifier)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])

        funder_identifier = tools.Try(ctx.funderIdentifier)
        funder_identifier_type = tools.Try(ctx.funderIdentifierType)

        contributor_type = tools.Try(ctx.contributorType)
예제 #18
0
파일: v1_push.py 프로젝트: sheriefvt/SHARE
class Agent(Parser):
    schema = tools.GuessAgentType(ctx.name)

    name = ctx.name

    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith),
                               tools.Try(ctx.affiliation))

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Map(tools.IRI(), tools.Try(ctx.sameAs), tools.Try(ctx.email)))

    class Extra:
        givenName = tools.Try(ctx.givenName)
        familyName = tools.Try(ctx.familyName)
        additonalName = tools.Try(ctx.additionalName)
        name = tools.Try(ctx.name)
예제 #19
0
class RelatedLink(Parser):
    schema = 'Link'

    url = tools.RunPython(force_text, ctx)
    type = tools.RunPython('lower', tools.Try(ctx['@relatedIdentifierType']))

    def lower(self, type):
        return type.lower()
예제 #20
0
class Project(CreativeWork):
    is_root = True
    related_works = tools.Map(tools.Delegate(IsPartOf),
                              tools.Try(ctx.children))

    related_agents = tools.Concat(
        tools.Map(
            tools.Delegate(Creator),
            tools.Filter(lambda x: x['attributes']['bibliographic'],
                         ctx.contributors)),
        tools.Map(
            tools.Delegate(Contributor),
            tools.Filter(lambda x: not x['attributes']['bibliographic'],
                         ctx.contributors)),
        tools.Map(tools.Delegate(AgentWorkRelation),
                  tools.Try(ctx.institutions)),
    )
예제 #21
0
class Person(Parser):
    given_name = ctx.embeds.users.data.attributes.given_name
    family_name = ctx.embeds.users.data.attributes.family_name
    additional_name = ctx.embeds.users.data.attributes.middle_names
    suffix = ctx.embeds.users.data.attributes.suffix
    identifiers = tools.Map(
        tools.Delegate(ThroughIdentifiers),
        tools.Try(ctx.embeds.users.data.links.html),
        tools.Try(ctx.embeds.users.data.links.profile_image),
        tools.Try(ctx.embeds.users.errors[0].meta.profile_image))

    class Extra:
        locale = ctx.embeds.users.data.attributes.locale
        date_registered = ctx.embeds.users.data.attributes.date_registered
        active = ctx.embeds.users.data.attributes.active
        timezone = ctx.embeds.users.data.attributes.timezone
        profile_image = ctx.embeds.users.data.links.profile_image
예제 #22
0
class Venue(Parser):
    name = tools.Try(tools.RunPython(force_text, ctx.geoLocationPlace))

    # polygon = tools.Try(ctx.geoLocationBox)
    # point = tools.Try(ctx.geoLocationPoint)

    class Extra:
        polygon = tools.Try(ctx.geoLocationBox)
        point = tools.Try(ctx.geoLocationPoint)
예제 #23
0
class FunderRelation(Parser):
    schema = 'Funder'

    agent = tools.Delegate(FunderAgent, ctx)
    awards = tools.Map(tools.Delegate(ThroughAwards),
                       tools.Try(tools.RunPython('get_award', ctx)))

    def get_award(self, obj):
        obj['awardURI']
        return obj
예제 #24
0
파일: normalizer.py 프로젝트: sf2ne/SHARE
class Person(Parser):

    given_name = tools.ParseName(ctx.name).first
    family_name = tools.ParseName(ctx.name).last
    additional_name = tools.ParseName(ctx.name).middle
    suffix = tools.ParseName(ctx.name).suffix

    affiliations = tools.Map(
        tools.Delegate(Affiliation.using(entity=tools.Delegate(Organization))),
        tools.Try(ctx['arxiv:affiliation']))
예제 #25
0
class DataCenterAgent(Parser):
    schema = tools.GuessAgentType(ctx.Data_Center_Name.Long_Name,
                                  default='organization')

    name = ctx.Data_Center_Name.Long_Name
    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith),
                               tools.Try(ctx.Personnel))

    class Extra:
        data_center_short_name = ctx.Data_Center_Name.Short_Name
예제 #26
0
class AffiliatedAgent(Parser):
    schema = tools.GuessAgentType(ctx.awardeeName, default='organization')

    name = ctx.awardeeName
    location = tools.Join(tools.Concat(ctx.awardeeCity,
                                       tools.Try(ctx.awardeeStateCode)),
                          joiner=', ')

    class Extra:
        awardee_city = ctx.awardeeCity
        awardee_state_code = tools.Try(ctx.awardeeStateCode)
예제 #27
0
class CreatorPerson(Parser):
    schema = 'Person'

    suffix = tools.ParseName(ctx.creatorName).suffix
    family_name = tools.ParseName(ctx.creatorName).last
    given_name = tools.ParseName(ctx.creatorName).first
    additional_name = tools.ParseName(ctx.creatorName).middle
    affiliations = tools.Map(
        tools.Delegate(
            Affiliation.using(entity=tools.Delegate(CreatorOrganization))),
        tools.Concat(tools.Try(tools.RunPython(force_text, ctx.affiliation))))
    identifiers = tools.Map(tools.Delegate(ThroughIdentifiers),
                            tools.Try(ctx.nameIdentifier))

    class Extra:
        name_identifier = tools.Try(
            tools.RunPython(force_text, ctx.nameIdentifier))
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])
예제 #28
0
class ContributorAgent(Parser):
    schema = tools.OneOf(
        tools.GuessAgentType(tools.RunPython(get_agent_type, ctx,
                                             person=False),
                             default='organization'),
        tools.GuessAgentType(tools.OneOf(ctx.creatorName,
                                         ctx.contributorName)))

    name = tools.OneOf(ctx.creatorName, ctx.contributorName)
    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Try(tools.IRI(tools.RunPython(force_text, ctx.nameIdentifier)),
                  exceptions=(ValueError, )))
    related_agents = tools.Map(
        tools.Delegate(IsAffiliatedWith),
        tools.Concat(
            tools.Try(
                tools.Filter(lambda x: bool(x),
                             tools.RunPython(force_text, ctx.affiliation)))))

    class Extra:
        name_identifier = tools.Try(ctx.nameIdentifier)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])

        contributor_type = tools.Try(ctx.contributorType)

        # v.4 new givenName and familyName properties
        given_name = tools.OneOf(ctx.creatorName['@givenName'],
                                 ctx.contributorName['@givenName'],
                                 tools.Static(None))
        family_name = tools.OneOf(ctx.creatorName['@familyName'],
                                  ctx.contributorName['@familyName'],
                                  tools.Static(None))
예제 #29
0
class Preprint(osf.Project):
    description = tools.Try(ctx.attributes.abstract)
    date_updated = tools.ParseDate(ctx.attributes.date_modified)
    date_published = tools.ParseDate(ctx.attributes.date_created)
    # NOTE: OSF has a direct mapping to SHARE's taxonomy. Subjects() is not needed
    subjects = tools.Map(tools.Delegate(ThroughSubjects),
                         ctx.attributes.subjects)
    identifiers = tools.Map(tools.Delegate(WorkIdentifier), ctx.links.self,
                            ctx.links.html, tools.Try(ctx.links.doi))
    tags = tools.Map(tools.Delegate(ThroughTags),
                     tools.Try(ctx.attributes.tags))
    rights = tools.Try(ctx.attributes.node_license)

    related_works = tools.Static([])
    related_agents = tools.Concat(
        tools.Map(
            tools.Delegate(osf.Creator),
            tools.Filter(lambda x: x['attributes']['bibliographic'],
                         ctx.contributors)),
        tools.Map(
            tools.Delegate(osf.Contributor),
            tools.Filter(lambda x: not x['attributes']['bibliographic'],
                         ctx.contributors)),
    )
예제 #30
0
class ContributorPerson(Parser):
    schema = 'Person'

    suffix = tools.ParseName(ctx.contributorName).suffix
    family_name = tools.ParseName(ctx.contributorName).last
    given_name = tools.ParseName(ctx.contributorName).first
    additional_name = tools.ParseName(ctx.contributorName).middle
    identifiers = tools.Map(tools.Delegate(ThroughIdentifiers),
                            tools.Try(ctx.nameIdentifier))

    class Extra:
        name_identifier = tools.Try(
            tools.RunPython(force_text, ctx.nameIdentifier))
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])
        contributor_type = tools.Try(ctx.contributorType)